LCOV - code coverage report
Current view: top level - module/bdev/nvme - bdev_nvme.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 2648 4945 53.5 %
Date: 2024-11-04 10:29:04 Functions: 225 325 69.2 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2016 Intel Corporation. All rights reserved.
       3             :  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
       4             :  *   Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  *   Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved.
       6             :  */
       7             : 
       8             : #include "spdk/stdinc.h"
       9             : 
      10             : #include "bdev_nvme.h"
      11             : 
      12             : #include "spdk/accel.h"
      13             : #include "spdk/config.h"
      14             : #include "spdk/endian.h"
      15             : #include "spdk/bdev.h"
      16             : #include "spdk/json.h"
      17             : #include "spdk/keyring.h"
      18             : #include "spdk/likely.h"
      19             : #include "spdk/nvme.h"
      20             : #include "spdk/nvme_ocssd.h"
      21             : #include "spdk/nvme_zns.h"
      22             : #include "spdk/opal.h"
      23             : #include "spdk/thread.h"
      24             : #include "spdk/trace.h"
      25             : #include "spdk/string.h"
      26             : #include "spdk/util.h"
      27             : #include "spdk/uuid.h"
      28             : 
      29             : #include "spdk/bdev_module.h"
      30             : #include "spdk/log.h"
      31             : 
      32             : #include "spdk_internal/usdt.h"
      33             : #include "spdk_internal/trace_defs.h"
      34             : 
      35             : #define CTRLR_STRING(nvme_ctrlr) \
      36             :         (spdk_nvme_trtype_is_fabrics(nvme_ctrlr->active_path_id->trid.trtype) ? \
      37             :         nvme_ctrlr->active_path_id->trid.subnqn : nvme_ctrlr->active_path_id->trid.traddr)
      38             : 
      39             : #define CTRLR_ID(nvme_ctrlr)    (spdk_nvme_ctrlr_get_id(nvme_ctrlr->ctrlr))
      40             : 
      41             : #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \
      42             :         SPDK_ERRLOG("[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
      43             : 
      44             : #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \
      45             :         SPDK_WARNLOG("[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
      46             : 
      47             : #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \
      48             :         SPDK_NOTICELOG("[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
      49             : 
      50             : #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \
      51             :         SPDK_INFOLOG(bdev_nvme, "[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
      52             : 
      53             : #ifdef DEBUG
      54             : #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \
      55             :         SPDK_DEBUGLOG(bdev_nvme, "[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
      56             : #else
      57             : #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0)
      58             : #endif
      59             : 
      60             : #define BDEV_STRING(nbdev) (nbdev->disk.name)
      61             : 
      62             : #define NVME_BDEV_ERRLOG(nbdev, format, ...) \
      63             :         SPDK_ERRLOG("[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
      64             : 
      65             : #define NVME_BDEV_WARNLOG(nbdev, format, ...) \
      66             :         SPDK_WARNLOG("[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
      67             : 
      68             : #define NVME_BDEV_NOTICELOG(nbdev, format, ...) \
      69             :         SPDK_NOTICELOG("[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
      70             : 
      71             : #define NVME_BDEV_INFOLOG(nbdev, format, ...) \
      72             :         SPDK_INFOLOG(bdev_nvme, "[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
      73             : 
      74             : #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true
      75             : #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
      76             : 
      77             : #define NSID_STR_LEN 10
      78             : 
      79             : #define SPDK_CONTROLLER_NAME_MAX 512
      80             : 
      81             : static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
      82             : 
      83             : struct nvme_bdev_io {
      84             :         /** array of iovecs to transfer. */
      85             :         struct iovec *iovs;
      86             : 
      87             :         /** Number of iovecs in iovs array. */
      88             :         int iovcnt;
      89             : 
      90             :         /** Current iovec position. */
      91             :         int iovpos;
      92             : 
      93             :         /** Offset in current iovec. */
      94             :         uint32_t iov_offset;
      95             : 
      96             :         /** Offset in current iovec. */
      97             :         uint32_t fused_iov_offset;
      98             : 
      99             :         /** array of iovecs to transfer. */
     100             :         struct iovec *fused_iovs;
     101             : 
     102             :         /** Number of iovecs in iovs array. */
     103             :         int fused_iovcnt;
     104             : 
     105             :         /** Current iovec position. */
     106             :         int fused_iovpos;
     107             : 
     108             :         /** I/O path the current I/O or admin passthrough is submitted on, or the I/O path
     109             :          *  being reset in a reset I/O.
     110             :          */
     111             :         struct nvme_io_path *io_path;
     112             : 
     113             :         /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */
     114             :         struct spdk_nvme_cpl cpl;
     115             : 
     116             :         /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */
     117             :         struct spdk_nvme_ns_cmd_ext_io_opts ext_opts;
     118             : 
     119             :         /** Keeps track if first of fused commands was submitted */
     120             :         bool first_fused_submitted;
     121             : 
     122             :         /** Keeps track if first of fused commands was completed */
     123             :         bool first_fused_completed;
     124             : 
     125             :         /* How many times the current I/O was retried. */
     126             :         int32_t retry_count;
     127             : 
     128             :         /** Expiration value in ticks to retry the current I/O. */
     129             :         uint64_t retry_ticks;
     130             : 
     131             :         /** Temporary pointer to zone report buffer */
     132             :         struct spdk_nvme_zns_zone_report *zone_report_buf;
     133             : 
     134             :         /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */
     135             :         uint64_t handled_zones;
     136             : 
     137             :         /* Current tsc at submit time. */
     138             :         uint64_t submit_tsc;
     139             : 
     140             :         /* Used to put nvme_bdev_io into the list */
     141             :         TAILQ_ENTRY(nvme_bdev_io) retry_link;
     142             : };
     143             : 
     144             : struct nvme_probe_skip_entry {
     145             :         struct spdk_nvme_transport_id           trid;
     146             :         TAILQ_ENTRY(nvme_probe_skip_entry)      tailq;
     147             : };
     148             : /* All the controllers deleted by users via RPC are skipped by hotplug monitor */
     149             : static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
     150             :                         g_skipped_nvme_ctrlrs);
     151             : 
     152             : #define BDEV_NVME_DEFAULT_DIGESTS (SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA256) | \
     153             :                                    SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA384) | \
     154             :                                    SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA512))
     155             : 
     156             : #define BDEV_NVME_DEFAULT_DHGROUPS (SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_NULL) | \
     157             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_2048) | \
     158             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_3072) | \
     159             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_4096) | \
     160             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_6144) | \
     161             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_8192))
     162             : 
     163             : static struct spdk_bdev_nvme_opts g_opts = {
     164             :         .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
     165             :         .timeout_us = 0,
     166             :         .timeout_admin_us = 0,
     167             :         .keep_alive_timeout_ms = SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS,
     168             :         .transport_retry_count = 4,
     169             :         .arbitration_burst = 0,
     170             :         .low_priority_weight = 0,
     171             :         .medium_priority_weight = 0,
     172             :         .high_priority_weight = 0,
     173             :         .nvme_adminq_poll_period_us = 10000ULL,
     174             :         .nvme_ioq_poll_period_us = 0,
     175             :         .io_queue_requests = 0,
     176             :         .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
     177             :         .bdev_retry_count = 3,
     178             :         .transport_ack_timeout = 0,
     179             :         .ctrlr_loss_timeout_sec = 0,
     180             :         .reconnect_delay_sec = 0,
     181             :         .fast_io_fail_timeout_sec = 0,
     182             :         .disable_auto_failback = false,
     183             :         .generate_uuids = false,
     184             :         .transport_tos = 0,
     185             :         .nvme_error_stat = false,
     186             :         .io_path_stat = false,
     187             :         .allow_accel_sequence = false,
     188             :         .dhchap_digests = BDEV_NVME_DEFAULT_DIGESTS,
     189             :         .dhchap_dhgroups = BDEV_NVME_DEFAULT_DHGROUPS,
     190             : };
     191             : 
     192             : #define NVME_HOTPLUG_POLL_PERIOD_MAX                    10000000ULL
     193             : #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT                100000ULL
     194             : 
     195             : static int g_hot_insert_nvme_controller_index = 0;
     196             : static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
     197             : static bool g_nvme_hotplug_enabled = false;
     198             : struct spdk_thread *g_bdev_nvme_init_thread;
     199             : static struct spdk_poller *g_hotplug_poller;
     200             : static struct spdk_poller *g_hotplug_probe_poller;
     201             : static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
     202             : 
     203             : static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
     204             :                 struct nvme_async_probe_ctx *ctx);
     205             : static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
     206             :                 struct nvme_async_probe_ctx *ctx);
     207             : static int bdev_nvme_library_init(void);
     208             : static void bdev_nvme_library_fini(void);
     209             : static void _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch,
     210             :                                       struct spdk_bdev_io *bdev_io);
     211             : static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
     212             :                                      struct spdk_bdev_io *bdev_io);
     213             : static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     214             :                            void *md, uint64_t lba_count, uint64_t lba,
     215             :                            uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
     216             :                            struct spdk_accel_sequence *seq);
     217             : static int bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     218             :                                  void *md, uint64_t lba_count, uint64_t lba);
     219             : static int bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     220             :                             void *md, uint64_t lba_count, uint64_t lba,
     221             :                             uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
     222             :                             struct spdk_accel_sequence *seq,
     223             :                             union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13);
     224             : static int bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     225             :                                   void *md, uint64_t lba_count,
     226             :                                   uint64_t zslba, uint32_t flags);
     227             : static int bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     228             :                               void *md, uint64_t lba_count, uint64_t lba,
     229             :                               uint32_t flags);
     230             : static int bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio,
     231             :                 struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
     232             :                 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba,
     233             :                 uint32_t flags);
     234             : static int bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id,
     235             :                                    uint32_t num_zones, struct spdk_bdev_zone_info *info);
     236             : static int bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
     237             :                                      enum spdk_bdev_zone_action action);
     238             : static void bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch,
     239             :                                      struct nvme_bdev_io *bio,
     240             :                                      struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
     241             : static int bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     242             :                                  void *buf, size_t nbytes);
     243             : static int bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     244             :                                     void *buf, size_t nbytes, void *md_buf, size_t md_len);
     245             : static int bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     246             :                                      struct iovec *iov, int iovcnt, size_t nbytes,
     247             :                                      void *md_buf, size_t md_len);
     248             : static void bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch,
     249             :                             struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
     250             : static void bdev_nvme_reset_io(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio);
     251             : static int bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
     252             : static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
     253             : static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
     254             : static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr);
     255             : 
     256             : static struct nvme_ns *nvme_ns_alloc(void);
     257             : static void nvme_ns_free(struct nvme_ns *ns);
     258             : 
     259             : static int
     260         175 : nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2)
     261             : {
     262         175 :         return ns1->id < ns2->id ? -1 : ns1->id > ns2->id;
     263             : }
     264             : 
     265        1070 : RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp);
     266             : 
     267             : struct spdk_nvme_qpair *
     268           1 : bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
     269             : {
     270             :         struct nvme_ctrlr_channel *ctrlr_ch;
     271             : 
     272           1 :         assert(ctrlr_io_ch != NULL);
     273             : 
     274           1 :         ctrlr_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
     275             : 
     276           1 :         return ctrlr_ch->qpair->qpair;
     277             : }
     278             : 
     279             : static int
     280           0 : bdev_nvme_get_ctx_size(void)
     281             : {
     282           0 :         return sizeof(struct nvme_bdev_io);
     283             : }
     284             : 
     285             : static struct spdk_bdev_module nvme_if = {
     286             :         .name = "nvme",
     287             :         .async_fini = true,
     288             :         .module_init = bdev_nvme_library_init,
     289             :         .module_fini = bdev_nvme_library_fini,
     290             :         .config_json = bdev_nvme_config_json,
     291             :         .get_ctx_size = bdev_nvme_get_ctx_size,
     292             : 
     293             : };
     294           1 : SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
     295             : 
     296             : struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_bdev_ctrlrs);
     297             : pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
     298             : bool g_bdev_nvme_module_finish;
     299             : 
     300             : struct nvme_bdev_ctrlr *
     301         327 : nvme_bdev_ctrlr_get_by_name(const char *name)
     302             : {
     303             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     304             : 
     305         327 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     306         169 :                 if (strcmp(name, nbdev_ctrlr->name) == 0) {
     307         169 :                         break;
     308             :                 }
     309           0 :         }
     310             : 
     311         327 :         return nbdev_ctrlr;
     312             : }
     313             : 
     314             : static struct nvme_ctrlr *
     315          58 : nvme_bdev_ctrlr_get_ctrlr(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     316             :                           const struct spdk_nvme_transport_id *trid, const char *hostnqn)
     317             : {
     318             :         const struct spdk_nvme_ctrlr_opts *opts;
     319             :         struct nvme_ctrlr *nvme_ctrlr;
     320             : 
     321          99 :         TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
     322          74 :                 opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
     323          74 :                 if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->active_path_id->trid) == 0 &&
     324          33 :                     strcmp(hostnqn, opts->hostnqn) == 0) {
     325          33 :                         break;
     326             :                 }
     327          41 :         }
     328             : 
     329          58 :         return nvme_ctrlr;
     330             : }
     331             : 
     332             : struct nvme_ctrlr *
     333           0 : nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     334             :                                 uint16_t cntlid)
     335             : {
     336             :         struct nvme_ctrlr *nvme_ctrlr;
     337             :         const struct spdk_nvme_ctrlr_data *cdata;
     338             : 
     339           0 :         TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
     340           0 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
     341           0 :                 if (cdata->cntlid == cntlid) {
     342           0 :                         break;
     343             :                 }
     344           0 :         }
     345             : 
     346           0 :         return nvme_ctrlr;
     347             : }
     348             : 
     349             : static struct nvme_bdev *
     350          73 : nvme_bdev_ctrlr_get_bdev(struct nvme_bdev_ctrlr *nbdev_ctrlr, uint32_t nsid)
     351             : {
     352             :         struct nvme_bdev *bdev;
     353             : 
     354          73 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     355         107 :         TAILQ_FOREACH(bdev, &nbdev_ctrlr->bdevs, tailq) {
     356          68 :                 if (bdev->nsid == nsid) {
     357          34 :                         break;
     358             :                 }
     359          34 :         }
     360          73 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     361             : 
     362          73 :         return bdev;
     363             : }
     364             : 
     365             : struct nvme_ns *
     366         143 : nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
     367             : {
     368             :         struct nvme_ns ns;
     369             : 
     370         143 :         assert(nsid > 0);
     371             : 
     372         143 :         ns.id = nsid;
     373         143 :         return RB_FIND(nvme_ns_tree, &nvme_ctrlr->namespaces, &ns);
     374             : }
     375             : 
     376             : struct nvme_ns *
     377         162 : nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr)
     378             : {
     379         162 :         return RB_MIN(nvme_ns_tree, &nvme_ctrlr->namespaces);
     380             : }
     381             : 
     382             : struct nvme_ns *
     383          72 : nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns)
     384             : {
     385          72 :         if (ns == NULL) {
     386           0 :                 return NULL;
     387             :         }
     388             : 
     389          72 :         return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
     390          72 : }
     391             : 
     392             : static struct nvme_ctrlr *
     393          52 : nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid, const char *hostnqn)
     394             : {
     395             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
     396          52 :         struct nvme_ctrlr       *nvme_ctrlr = NULL;
     397             : 
     398          52 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     399          71 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     400          19 :                 nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, trid, hostnqn);
     401          19 :                 if (nvme_ctrlr != NULL) {
     402           0 :                         break;
     403             :                 }
     404          19 :         }
     405          52 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     406             : 
     407          52 :         return nvme_ctrlr;
     408             : }
     409             : 
     410             : struct nvme_ctrlr *
     411         125 : nvme_ctrlr_get_by_name(const char *name)
     412             : {
     413             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     414         125 :         struct nvme_ctrlr *nvme_ctrlr = NULL;
     415             : 
     416         125 :         if (name == NULL) {
     417           0 :                 return NULL;
     418             :         }
     419             : 
     420         125 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     421         125 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
     422         125 :         if (nbdev_ctrlr != NULL) {
     423          60 :                 nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
     424          60 :         }
     425         125 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     426             : 
     427         125 :         return nvme_ctrlr;
     428         125 : }
     429             : 
     430             : void
     431           0 : nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx)
     432             : {
     433             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     434             : 
     435           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     436           0 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     437           0 :                 fn(nbdev_ctrlr, ctx);
     438           0 :         }
     439           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     440           0 : }
     441             : 
     442             : struct nvme_ctrlr_channel_iter {
     443             :         nvme_ctrlr_for_each_channel_msg fn;
     444             :         nvme_ctrlr_for_each_channel_done cpl;
     445             :         struct spdk_io_channel_iter *i;
     446             :         void *ctx;
     447             : };
     448             : 
     449             : void
     450         284 : nvme_ctrlr_for_each_channel_continue(struct nvme_ctrlr_channel_iter *iter, int status)
     451             : {
     452         284 :         spdk_for_each_channel_continue(iter->i, status);
     453         284 : }
     454             : 
     455             : static void
     456         284 : nvme_ctrlr_each_channel_msg(struct spdk_io_channel_iter *i)
     457             : {
     458         284 :         struct nvme_ctrlr_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
     459         284 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
     460         284 :         struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
     461         284 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
     462             : 
     463         284 :         iter->i = i;
     464         284 :         iter->fn(iter, nvme_ctrlr, ctrlr_ch, iter->ctx);
     465         284 : }
     466             : 
     467             : static void
     468         165 : nvme_ctrlr_each_channel_cpl(struct spdk_io_channel_iter *i, int status)
     469             : {
     470         165 :         struct nvme_ctrlr_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
     471         165 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
     472             : 
     473         165 :         iter->i = i;
     474         165 :         iter->cpl(nvme_ctrlr, iter->ctx, status);
     475             : 
     476         165 :         free(iter);
     477         165 : }
     478             : 
     479             : void
     480         165 : nvme_ctrlr_for_each_channel(struct nvme_ctrlr *nvme_ctrlr,
     481             :                             nvme_ctrlr_for_each_channel_msg fn, void *ctx,
     482             :                             nvme_ctrlr_for_each_channel_done cpl)
     483             : {
     484             :         struct nvme_ctrlr_channel_iter *iter;
     485             : 
     486         165 :         assert(nvme_ctrlr != NULL && fn != NULL);
     487             : 
     488         165 :         iter = calloc(1, sizeof(struct nvme_ctrlr_channel_iter));
     489         165 :         if (iter == NULL) {
     490           0 :                 SPDK_ERRLOG("Unable to allocate iterator\n");
     491           0 :                 assert(false);
     492             :                 return;
     493             :         }
     494             : 
     495         165 :         iter->fn = fn;
     496         165 :         iter->cpl = cpl;
     497         165 :         iter->ctx = ctx;
     498             : 
     499         330 :         spdk_for_each_channel(nvme_ctrlr, nvme_ctrlr_each_channel_msg,
     500         165 :                               iter, nvme_ctrlr_each_channel_cpl);
     501         165 : }
     502             : 
     503             : struct nvme_bdev_channel_iter {
     504             :         nvme_bdev_for_each_channel_msg fn;
     505             :         nvme_bdev_for_each_channel_done cpl;
     506             :         struct spdk_io_channel_iter *i;
     507             :         void *ctx;
     508             : };
     509             : 
     510             : void
     511          67 : nvme_bdev_for_each_channel_continue(struct nvme_bdev_channel_iter *iter, int status)
     512             : {
     513          67 :         spdk_for_each_channel_continue(iter->i, status);
     514          67 : }
     515             : 
     516             : static void
     517          67 : nvme_bdev_each_channel_msg(struct spdk_io_channel_iter *i)
     518             : {
     519          67 :         struct nvme_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
     520          67 :         struct nvme_bdev *nbdev = spdk_io_channel_iter_get_io_device(i);
     521          67 :         struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
     522          67 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
     523             : 
     524          67 :         iter->i = i;
     525          67 :         iter->fn(iter, nbdev, nbdev_ch, iter->ctx);
     526          67 : }
     527             : 
     528             : static void
     529          59 : nvme_bdev_each_channel_cpl(struct spdk_io_channel_iter *i, int status)
     530             : {
     531          59 :         struct nvme_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
     532          59 :         struct nvme_bdev *nbdev = spdk_io_channel_iter_get_io_device(i);
     533             : 
     534          59 :         iter->i = i;
     535          59 :         iter->cpl(nbdev, iter->ctx, status);
     536             : 
     537          59 :         free(iter);
     538          59 : }
     539             : 
     540             : void
     541          59 : nvme_bdev_for_each_channel(struct nvme_bdev *nbdev,
     542             :                            nvme_bdev_for_each_channel_msg fn, void *ctx,
     543             :                            nvme_bdev_for_each_channel_done cpl)
     544             : {
     545             :         struct nvme_bdev_channel_iter *iter;
     546             : 
     547          59 :         assert(nbdev != NULL && fn != NULL);
     548             : 
     549          59 :         iter = calloc(1, sizeof(struct nvme_bdev_channel_iter));
     550          59 :         if (iter == NULL) {
     551           0 :                 SPDK_ERRLOG("Unable to allocate iterator\n");
     552           0 :                 assert(false);
     553             :                 return;
     554             :         }
     555             : 
     556          59 :         iter->fn = fn;
     557          59 :         iter->cpl = cpl;
     558          59 :         iter->ctx = ctx;
     559             : 
     560          59 :         spdk_for_each_channel(nbdev, nvme_bdev_each_channel_msg, iter,
     561             :                               nvme_bdev_each_channel_cpl);
     562          59 : }
     563             : 
     564             : void
     565           0 : nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
     566             : {
     567             :         const char *trtype_str;
     568             :         const char *adrfam_str;
     569             : 
     570           0 :         trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
     571           0 :         if (trtype_str) {
     572           0 :                 spdk_json_write_named_string(w, "trtype", trtype_str);
     573           0 :         }
     574             : 
     575           0 :         adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
     576           0 :         if (adrfam_str) {
     577           0 :                 spdk_json_write_named_string(w, "adrfam", adrfam_str);
     578           0 :         }
     579             : 
     580           0 :         if (trid->traddr[0] != '\0') {
     581           0 :                 spdk_json_write_named_string(w, "traddr", trid->traddr);
     582           0 :         }
     583             : 
     584           0 :         if (trid->trsvcid[0] != '\0') {
     585           0 :                 spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
     586           0 :         }
     587             : 
     588           0 :         if (trid->subnqn[0] != '\0') {
     589           0 :                 spdk_json_write_named_string(w, "subnqn", trid->subnqn);
     590           0 :         }
     591           0 : }
     592             : 
     593             : static void
     594          60 : nvme_bdev_ctrlr_delete(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     595             :                        struct nvme_ctrlr *nvme_ctrlr)
     596             : {
     597             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_delete, nvme_ctrlr->nbdev_ctrlr->name);
     598          60 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     599             : 
     600          60 :         TAILQ_REMOVE(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
     601          60 :         if (!TAILQ_EMPTY(&nbdev_ctrlr->ctrlrs)) {
     602          15 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
     603             : 
     604          15 :                 return;
     605             :         }
     606          45 :         TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
     607             : 
     608          45 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     609             : 
     610          45 :         assert(TAILQ_EMPTY(&nbdev_ctrlr->bdevs));
     611             : 
     612          45 :         free(nbdev_ctrlr->name);
     613          45 :         free(nbdev_ctrlr);
     614          60 : }
     615             : 
     616             : static void
     617          61 : _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
     618             : {
     619             :         struct nvme_path_id *path_id, *tmp_path;
     620             :         struct nvme_ns *ns, *tmp_ns;
     621             : 
     622          61 :         free(nvme_ctrlr->copied_ana_desc);
     623          61 :         spdk_free(nvme_ctrlr->ana_log_page);
     624             : 
     625          61 :         if (nvme_ctrlr->opal_dev) {
     626           0 :                 spdk_opal_dev_destruct(nvme_ctrlr->opal_dev);
     627           0 :                 nvme_ctrlr->opal_dev = NULL;
     628           0 :         }
     629             : 
     630          61 :         if (nvme_ctrlr->nbdev_ctrlr) {
     631          60 :                 nvme_bdev_ctrlr_delete(nvme_ctrlr->nbdev_ctrlr, nvme_ctrlr);
     632          60 :         }
     633             : 
     634          61 :         RB_FOREACH_SAFE(ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp_ns) {
     635           0 :                 RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
     636           0 :                 nvme_ns_free(ns);
     637           0 :         }
     638             : 
     639         122 :         TAILQ_FOREACH_SAFE(path_id, &nvme_ctrlr->trids, link, tmp_path) {
     640          61 :                 TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
     641          61 :                 free(path_id);
     642          61 :         }
     643             : 
     644          61 :         pthread_mutex_destroy(&nvme_ctrlr->mutex);
     645          61 :         spdk_keyring_put_key(nvme_ctrlr->psk);
     646          61 :         spdk_keyring_put_key(nvme_ctrlr->dhchap_key);
     647          61 :         spdk_keyring_put_key(nvme_ctrlr->dhchap_ctrlr_key);
     648          61 :         free(nvme_ctrlr);
     649             : 
     650          61 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     651          61 :         if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
     652           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
     653           0 :                 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
     654           0 :                 spdk_bdev_module_fini_done();
     655           0 :                 return;
     656             :         }
     657          61 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     658          61 : }
     659             : 
     660             : static int
     661          61 : nvme_detach_poller(void *arg)
     662             : {
     663          61 :         struct nvme_ctrlr *nvme_ctrlr = arg;
     664             :         int rc;
     665             : 
     666          61 :         rc = spdk_nvme_detach_poll_async(nvme_ctrlr->detach_ctx);
     667          61 :         if (rc != -EAGAIN) {
     668          61 :                 spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
     669          61 :                 _nvme_ctrlr_delete(nvme_ctrlr);
     670          61 :         }
     671             : 
     672          61 :         return SPDK_POLLER_BUSY;
     673             : }
     674             : 
     675             : static void
     676          61 : nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
     677             : {
     678             :         int rc;
     679             : 
     680          61 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
     681             : 
     682             :         /* First, unregister the adminq poller, as the driver will poll adminq if necessary */
     683          61 :         spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
     684             : 
     685             :         /* If we got here, the reset/detach poller cannot be active */
     686          61 :         assert(nvme_ctrlr->reset_detach_poller == NULL);
     687          61 :         nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(nvme_detach_poller,
     688             :                                           nvme_ctrlr, 1000);
     689          61 :         if (nvme_ctrlr->reset_detach_poller == NULL) {
     690           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to register detach poller\n");
     691           0 :                 goto error;
     692             :         }
     693             : 
     694          61 :         rc = spdk_nvme_detach_async(nvme_ctrlr->ctrlr, &nvme_ctrlr->detach_ctx);
     695          61 :         if (rc != 0) {
     696           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to detach the NVMe controller\n");
     697           0 :                 goto error;
     698             :         }
     699             : 
     700          61 :         return;
     701             : error:
     702             :         /* We don't have a good way to handle errors here, so just do what we can and delete the
     703             :          * controller without detaching the underlying NVMe device.
     704             :          */
     705           0 :         spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
     706           0 :         _nvme_ctrlr_delete(nvme_ctrlr);
     707          61 : }
     708             : 
     709             : static void
     710          60 : nvme_ctrlr_unregister_cb(void *io_device)
     711             : {
     712          60 :         struct nvme_ctrlr *nvme_ctrlr = io_device;
     713             : 
     714          60 :         nvme_ctrlr_delete(nvme_ctrlr);
     715          60 : }
     716             : 
     717             : static void
     718          60 : nvme_ctrlr_unregister(void *ctx)
     719             : {
     720          60 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
     721             : 
     722          60 :         spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb);
     723          60 : }
     724             : 
     725             : static bool
     726         244 : nvme_ctrlr_can_be_unregistered(struct nvme_ctrlr *nvme_ctrlr)
     727             : {
     728         244 :         if (!nvme_ctrlr->destruct) {
     729         128 :                 return false;
     730             :         }
     731             : 
     732         116 :         if (nvme_ctrlr->ref > 0) {
     733          56 :                 return false;
     734             :         }
     735             : 
     736          60 :         if (nvme_ctrlr->resetting) {
     737           0 :                 return false;
     738             :         }
     739             : 
     740          60 :         if (nvme_ctrlr->ana_log_page_updating) {
     741           0 :                 return false;
     742             :         }
     743             : 
     744          60 :         if (nvme_ctrlr->io_path_cache_clearing) {
     745           0 :                 return false;
     746             :         }
     747             : 
     748          60 :         return true;
     749         244 : }
     750             : 
     751             : static void
     752         168 : nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr)
     753             : {
     754         168 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
     755             :         SPDK_DTRACE_PROBE2(bdev_nvme_ctrlr_release, nvme_ctrlr->nbdev_ctrlr->name, nvme_ctrlr->ref);
     756             : 
     757         168 :         assert(nvme_ctrlr->ref > 0);
     758         168 :         nvme_ctrlr->ref--;
     759             : 
     760         168 :         if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
     761         108 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
     762         108 :                 return;
     763             :         }
     764             : 
     765          60 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
     766             : 
     767          60 :         spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr);
     768         168 : }
     769             : 
     770             : static void
     771         251 : bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch)
     772             : {
     773         251 :         nbdev_ch->current_io_path = NULL;
     774         251 :         nbdev_ch->rr_counter = 0;
     775         251 : }
     776             : 
     777             : static struct nvme_io_path *
     778           8 : _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
     779             : {
     780             :         struct nvme_io_path *io_path;
     781             : 
     782          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
     783          15 :                 if (io_path->nvme_ns == nvme_ns) {
     784           7 :                         break;
     785             :                 }
     786           8 :         }
     787             : 
     788           8 :         return io_path;
     789             : }
     790             : 
     791             : static struct nvme_io_path *
     792          37 : nvme_io_path_alloc(void)
     793             : {
     794             :         struct nvme_io_path *io_path;
     795             : 
     796          37 :         io_path = calloc(1, sizeof(*io_path));
     797          37 :         if (io_path == NULL) {
     798           0 :                 SPDK_ERRLOG("Failed to alloc io_path.\n");
     799           0 :                 return NULL;
     800             :         }
     801             : 
     802          37 :         if (g_opts.io_path_stat) {
     803           0 :                 io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
     804           0 :                 if (io_path->stat == NULL) {
     805           0 :                         free(io_path);
     806           0 :                         SPDK_ERRLOG("Failed to alloc io_path stat.\n");
     807           0 :                         return NULL;
     808             :                 }
     809           0 :                 spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
     810           0 :         }
     811             : 
     812          37 :         return io_path;
     813          37 : }
     814             : 
     815             : static void
     816          37 : nvme_io_path_free(struct nvme_io_path *io_path)
     817             : {
     818          37 :         free(io_path->stat);
     819          37 :         free(io_path);
     820          37 : }
     821             : 
     822             : static int
     823          37 : _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
     824             : {
     825             :         struct nvme_io_path *io_path;
     826             :         struct spdk_io_channel *ch;
     827             :         struct nvme_ctrlr_channel *ctrlr_ch;
     828             :         struct nvme_qpair *nvme_qpair;
     829             : 
     830          37 :         io_path = nvme_io_path_alloc();
     831          37 :         if (io_path == NULL) {
     832           0 :                 return -ENOMEM;
     833             :         }
     834             : 
     835          37 :         io_path->nvme_ns = nvme_ns;
     836             : 
     837          37 :         ch = spdk_get_io_channel(nvme_ns->ctrlr);
     838          37 :         if (ch == NULL) {
     839           0 :                 nvme_io_path_free(io_path);
     840           0 :                 SPDK_ERRLOG("Failed to alloc io_channel.\n");
     841           0 :                 return -ENOMEM;
     842             :         }
     843             : 
     844          37 :         ctrlr_ch = spdk_io_channel_get_ctx(ch);
     845             : 
     846          37 :         nvme_qpair = ctrlr_ch->qpair;
     847          37 :         assert(nvme_qpair != NULL);
     848             : 
     849          37 :         io_path->qpair = nvme_qpair;
     850          37 :         TAILQ_INSERT_TAIL(&nvme_qpair->io_path_list, io_path, tailq);
     851             : 
     852          37 :         io_path->nbdev_ch = nbdev_ch;
     853          37 :         STAILQ_INSERT_TAIL(&nbdev_ch->io_path_list, io_path, stailq);
     854             : 
     855          37 :         bdev_nvme_clear_current_io_path(nbdev_ch);
     856             : 
     857          37 :         return 0;
     858          37 : }
     859             : 
     860             : static void
     861          37 : bdev_nvme_clear_retry_io_path(struct nvme_bdev_channel *nbdev_ch,
     862             :                               struct nvme_io_path *io_path)
     863             : {
     864             :         struct nvme_bdev_io *bio;
     865             : 
     866          38 :         TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
     867           1 :                 if (bio->io_path == io_path) {
     868           1 :                         bio->io_path = NULL;
     869           1 :                 }
     870           1 :         }
     871          37 : }
     872             : 
     873             : static void
     874          37 : _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *io_path)
     875             : {
     876             :         struct spdk_io_channel *ch;
     877             :         struct nvme_qpair *nvme_qpair;
     878             :         struct nvme_ctrlr_channel *ctrlr_ch;
     879             :         struct nvme_bdev *nbdev;
     880             : 
     881          37 :         nbdev = spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(nbdev_ch));
     882             : 
     883             :         /* Add the statistics to nvme_ns before this path is destroyed. */
     884          37 :         pthread_mutex_lock(&nbdev->mutex);
     885          37 :         if (nbdev->ref != 0 && io_path->nvme_ns->stat != NULL && io_path->stat != NULL) {
     886           0 :                 spdk_bdev_add_io_stat(io_path->nvme_ns->stat, io_path->stat);
     887           0 :         }
     888          37 :         pthread_mutex_unlock(&nbdev->mutex);
     889             : 
     890          37 :         bdev_nvme_clear_current_io_path(nbdev_ch);
     891          37 :         bdev_nvme_clear_retry_io_path(nbdev_ch, io_path);
     892             : 
     893          39 :         STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
     894          37 :         io_path->nbdev_ch = NULL;
     895             : 
     896          37 :         nvme_qpair = io_path->qpair;
     897          37 :         assert(nvme_qpair != NULL);
     898             : 
     899          37 :         ctrlr_ch = nvme_qpair->ctrlr_ch;
     900          37 :         assert(ctrlr_ch != NULL);
     901             : 
     902          37 :         ch = spdk_io_channel_from_ctx(ctrlr_ch);
     903          37 :         spdk_put_io_channel(ch);
     904             : 
     905             :         /* After an io_path is removed, I/Os submitted to it may complete and update statistics
     906             :          * of the io_path. To avoid heap-use-after-free error from this case, do not free the
     907             :          * io_path here but free the io_path when the associated qpair is freed. It is ensured
     908             :          * that all I/Os submitted to the io_path are completed when the associated qpair is freed.
     909             :          */
     910          37 : }
     911             : 
     912             : static void
     913          24 : _bdev_nvme_delete_io_paths(struct nvme_bdev_channel *nbdev_ch)
     914             : {
     915             :         struct nvme_io_path *io_path, *tmp_io_path;
     916             : 
     917          59 :         STAILQ_FOREACH_SAFE(io_path, &nbdev_ch->io_path_list, stailq, tmp_io_path) {
     918          35 :                 _bdev_nvme_delete_io_path(nbdev_ch, io_path);
     919          35 :         }
     920          24 : }
     921             : 
     922             : static int
     923          24 : bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
     924             : {
     925          24 :         struct nvme_bdev_channel *nbdev_ch = ctx_buf;
     926          24 :         struct nvme_bdev *nbdev = io_device;
     927             :         struct nvme_ns *nvme_ns;
     928             :         int rc;
     929             : 
     930          24 :         STAILQ_INIT(&nbdev_ch->io_path_list);
     931          24 :         TAILQ_INIT(&nbdev_ch->retry_io_list);
     932             : 
     933          24 :         pthread_mutex_lock(&nbdev->mutex);
     934             : 
     935          24 :         nbdev_ch->mp_policy = nbdev->mp_policy;
     936          24 :         nbdev_ch->mp_selector = nbdev->mp_selector;
     937          24 :         nbdev_ch->rr_min_io = nbdev->rr_min_io;
     938             : 
     939          59 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
     940          35 :                 rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
     941          35 :                 if (rc != 0) {
     942           0 :                         pthread_mutex_unlock(&nbdev->mutex);
     943             : 
     944           0 :                         _bdev_nvme_delete_io_paths(nbdev_ch);
     945           0 :                         return rc;
     946             :                 }
     947          35 :         }
     948          24 :         pthread_mutex_unlock(&nbdev->mutex);
     949             : 
     950          24 :         return 0;
     951          24 : }
     952             : 
     953             : /* If cpl != NULL, complete the bdev_io with nvme status based on 'cpl'.
     954             :  * If cpl == NULL, complete the bdev_io with bdev status based on 'status'.
     955             :  */
     956             : static inline void
     957          57 : __bdev_nvme_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status,
     958             :                         const struct spdk_nvme_cpl *cpl)
     959             : {
     960          57 :         spdk_trace_record(TRACE_BDEV_NVME_IO_DONE, 0, 0, (uintptr_t)bdev_io->driver_ctx,
     961             :                           (uintptr_t)bdev_io);
     962          57 :         if (cpl) {
     963          29 :                 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
     964          29 :         } else {
     965          28 :                 spdk_bdev_io_complete(bdev_io, status);
     966             :         }
     967          57 : }
     968             : 
     969             : static void bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch);
     970             : 
     971             : static void
     972          24 : bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf)
     973             : {
     974          24 :         struct nvme_bdev_channel *nbdev_ch = ctx_buf;
     975             : 
     976          24 :         bdev_nvme_abort_retry_ios(nbdev_ch);
     977          24 :         _bdev_nvme_delete_io_paths(nbdev_ch);
     978          24 : }
     979             : 
     980             : static inline bool
     981          62 : bdev_nvme_io_type_is_admin(enum spdk_bdev_io_type io_type)
     982             : {
     983          62 :         switch (io_type) {
     984             :         case SPDK_BDEV_IO_TYPE_RESET:
     985             :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
     986             :         case SPDK_BDEV_IO_TYPE_ABORT:
     987           5 :                 return true;
     988             :         default:
     989          57 :                 break;
     990             :         }
     991             : 
     992          57 :         return false;
     993          62 : }
     994             : 
     995             : static inline bool
     996          98 : nvme_ns_is_active(struct nvme_ns *nvme_ns)
     997             : {
     998          98 :         if (spdk_unlikely(nvme_ns->ana_state_updating)) {
     999           1 :                 return false;
    1000             :         }
    1001             : 
    1002          97 :         if (spdk_unlikely(nvme_ns->ns == NULL)) {
    1003           0 :                 return false;
    1004             :         }
    1005             : 
    1006          97 :         return true;
    1007          98 : }
    1008             : 
    1009             : static inline bool
    1010          86 : nvme_ns_is_accessible(struct nvme_ns *nvme_ns)
    1011             : {
    1012          86 :         if (spdk_unlikely(!nvme_ns_is_active(nvme_ns))) {
    1013           1 :                 return false;
    1014             :         }
    1015             : 
    1016          85 :         switch (nvme_ns->ana_state) {
    1017             :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    1018             :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    1019          76 :                 return true;
    1020             :         default:
    1021           9 :                 break;
    1022             :         }
    1023             : 
    1024           9 :         return false;
    1025          86 : }
    1026             : 
    1027             : static inline bool
    1028         128 : nvme_qpair_is_connected(struct nvme_qpair *nvme_qpair)
    1029             : {
    1030         128 :         if (spdk_unlikely(nvme_qpair->qpair == NULL)) {
    1031          23 :                 return false;
    1032             :         }
    1033             : 
    1034         105 :         if (spdk_unlikely(spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
    1035             :                           SPDK_NVME_QPAIR_FAILURE_NONE)) {
    1036           2 :                 return false;
    1037             :         }
    1038             : 
    1039         103 :         if (spdk_unlikely(nvme_qpair->ctrlr_ch->reset_iter != NULL)) {
    1040           0 :                 return false;
    1041             :         }
    1042             : 
    1043         103 :         return true;
    1044         128 : }
    1045             : 
    1046             : static inline bool
    1047         102 : nvme_io_path_is_available(struct nvme_io_path *io_path)
    1048             : {
    1049         102 :         if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
    1050          16 :                 return false;
    1051             :         }
    1052             : 
    1053          86 :         if (spdk_unlikely(!nvme_ns_is_accessible(io_path->nvme_ns))) {
    1054          10 :                 return false;
    1055             :         }
    1056             : 
    1057          76 :         return true;
    1058         102 : }
    1059             : 
    1060             : static inline bool
    1061           9 : nvme_ctrlr_is_failed(struct nvme_ctrlr *nvme_ctrlr)
    1062             : {
    1063           9 :         if (nvme_ctrlr->destruct) {
    1064           0 :                 return true;
    1065             :         }
    1066             : 
    1067           9 :         if (nvme_ctrlr->fast_io_fail_timedout) {
    1068           2 :                 return true;
    1069             :         }
    1070             : 
    1071           7 :         if (nvme_ctrlr->resetting) {
    1072           5 :                 if (nvme_ctrlr->opts.reconnect_delay_sec != 0) {
    1073           5 :                         return false;
    1074             :                 } else {
    1075           0 :                         return true;
    1076             :                 }
    1077             :         }
    1078             : 
    1079           2 :         if (nvme_ctrlr->reconnect_is_delayed) {
    1080           2 :                 return false;
    1081             :         }
    1082             : 
    1083           0 :         if (nvme_ctrlr->disabled) {
    1084           0 :                 return true;
    1085             :         }
    1086             : 
    1087           0 :         if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
    1088           0 :                 return true;
    1089             :         } else {
    1090           0 :                 return false;
    1091             :         }
    1092           9 : }
    1093             : 
    1094             : static bool
    1095          20 : nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr)
    1096             : {
    1097          20 :         if (nvme_ctrlr->destruct) {
    1098           0 :                 return false;
    1099             :         }
    1100             : 
    1101          20 :         if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
    1102           3 :                 return false;
    1103             :         }
    1104             : 
    1105          17 :         if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) {
    1106           1 :                 return false;
    1107             :         }
    1108             : 
    1109          16 :         if (nvme_ctrlr->disabled) {
    1110           0 :                 return false;
    1111             :         }
    1112             : 
    1113          16 :         return true;
    1114          20 : }
    1115             : 
    1116             : /* Simulate circular linked list. */
    1117             : static inline struct nvme_io_path *
    1118          99 : nvme_io_path_get_next(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *prev_path)
    1119             : {
    1120             :         struct nvme_io_path *next_path;
    1121             : 
    1122          99 :         if (prev_path != NULL) {
    1123          39 :                 next_path = STAILQ_NEXT(prev_path, stailq);
    1124          39 :                 if (next_path != NULL) {
    1125          14 :                         return next_path;
    1126             :                 }
    1127          25 :         }
    1128             : 
    1129          85 :         return STAILQ_FIRST(&nbdev_ch->io_path_list);
    1130          99 : }
    1131             : 
    1132             : static struct nvme_io_path *
    1133          67 : _bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
    1134             : {
    1135          67 :         struct nvme_io_path *io_path, *start, *non_optimized = NULL;
    1136             : 
    1137          67 :         start = nvme_io_path_get_next(nbdev_ch, nbdev_ch->current_io_path);
    1138             : 
    1139          67 :         io_path = start;
    1140          67 :         do {
    1141          79 :                 if (spdk_likely(nvme_io_path_is_available(io_path))) {
    1142          57 :                         switch (io_path->nvme_ns->ana_state) {
    1143             :                         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    1144          47 :                                 nbdev_ch->current_io_path = io_path;
    1145          47 :                                 return io_path;
    1146             :                         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    1147          10 :                                 if (non_optimized == NULL) {
    1148           7 :                                         non_optimized = io_path;
    1149           7 :                                 }
    1150          10 :                                 break;
    1151             :                         default:
    1152           0 :                                 assert(false);
    1153             :                                 break;
    1154             :                         }
    1155          10 :                 }
    1156          32 :                 io_path = nvme_io_path_get_next(nbdev_ch, io_path);
    1157          32 :         } while (io_path != start);
    1158             : 
    1159          20 :         if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
    1160             :                 /* We come here only if there is no optimized path. Cache even non_optimized
    1161             :                  * path for load balance across multiple non_optimized paths.
    1162             :                  */
    1163           1 :                 nbdev_ch->current_io_path = non_optimized;
    1164           1 :         }
    1165             : 
    1166          20 :         return non_optimized;
    1167          67 : }
    1168             : 
    1169             : static struct nvme_io_path *
    1170           4 : _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch)
    1171             : {
    1172             :         struct nvme_io_path *io_path;
    1173           4 :         struct nvme_io_path *optimized = NULL, *non_optimized = NULL;
    1174           4 :         uint32_t opt_min_qd = UINT32_MAX, non_opt_min_qd = UINT32_MAX;
    1175             :         uint32_t num_outstanding_reqs;
    1176             : 
    1177          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    1178          12 :                 if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
    1179             :                         /* The device is currently resetting. */
    1180           0 :                         continue;
    1181             :                 }
    1182             : 
    1183          12 :                 if (spdk_unlikely(!nvme_ns_is_active(io_path->nvme_ns))) {
    1184           0 :                         continue;
    1185             :                 }
    1186             : 
    1187          12 :                 num_outstanding_reqs = spdk_nvme_qpair_get_num_outstanding_reqs(io_path->qpair->qpair);
    1188          12 :                 switch (io_path->nvme_ns->ana_state) {
    1189             :                 case SPDK_NVME_ANA_OPTIMIZED_STATE:
    1190           6 :                         if (num_outstanding_reqs < opt_min_qd) {
    1191           5 :                                 opt_min_qd = num_outstanding_reqs;
    1192           5 :                                 optimized = io_path;
    1193           5 :                         }
    1194           6 :                         break;
    1195             :                 case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    1196           3 :                         if (num_outstanding_reqs < non_opt_min_qd) {
    1197           3 :                                 non_opt_min_qd = num_outstanding_reqs;
    1198           3 :                                 non_optimized = io_path;
    1199           3 :                         }
    1200           3 :                         break;
    1201             :                 default:
    1202           3 :                         break;
    1203             :                 }
    1204          12 :         }
    1205             : 
    1206             :         /* don't cache io path for BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH selector */
    1207           4 :         if (optimized != NULL) {
    1208           3 :                 return optimized;
    1209             :         }
    1210             : 
    1211           1 :         return non_optimized;
    1212           4 : }
    1213             : 
    1214             : static inline struct nvme_io_path *
    1215         105 : bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
    1216             : {
    1217         105 :         if (spdk_likely(nbdev_ch->current_io_path != NULL)) {
    1218          41 :                 if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) {
    1219          31 :                         return nbdev_ch->current_io_path;
    1220          10 :                 } else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    1221          10 :                         if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) {
    1222           3 :                                 return nbdev_ch->current_io_path;
    1223             :                         }
    1224           7 :                         nbdev_ch->rr_counter = 0;
    1225           7 :                 }
    1226           7 :         }
    1227             : 
    1228          71 :         if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE ||
    1229          14 :             nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    1230          67 :                 return _bdev_nvme_find_io_path(nbdev_ch);
    1231             :         } else {
    1232           4 :                 return _bdev_nvme_find_io_path_min_qd(nbdev_ch);
    1233             :         }
    1234         105 : }
    1235             : 
    1236             : /* Return true if there is any io_path whose qpair is active or ctrlr is not failed,
    1237             :  * or false otherwise.
    1238             :  *
    1239             :  * If any io_path has an active qpair but find_io_path() returned NULL, its namespace
    1240             :  * is likely to be non-accessible now but may become accessible.
    1241             :  *
    1242             :  * If any io_path has an unfailed ctrlr but find_io_path() returned NULL, the ctrlr
    1243             :  * is likely to be resetting now but the reset may succeed. A ctrlr is set to unfailed
    1244             :  * when starting to reset it but it is set to failed when the reset failed. Hence, if
    1245             :  * a ctrlr is unfailed, it is likely that it works fine or is resetting.
    1246             :  */
    1247             : static bool
    1248          15 : any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
    1249             : {
    1250             :         struct nvme_io_path *io_path;
    1251             : 
    1252          15 :         if (nbdev_ch->resetting) {
    1253           1 :                 return false;
    1254             :         }
    1255             : 
    1256          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    1257          14 :                 if (io_path->nvme_ns->ana_transition_timedout) {
    1258           0 :                         continue;
    1259             :                 }
    1260             : 
    1261          14 :                 if (nvme_qpair_is_connected(io_path->qpair) ||
    1262           9 :                     !nvme_ctrlr_is_failed(io_path->qpair->ctrlr)) {
    1263          12 :                         return true;
    1264             :                 }
    1265           2 :         }
    1266             : 
    1267           2 :         return false;
    1268          15 : }
    1269             : 
    1270             : static void
    1271          14 : bdev_nvme_retry_io(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
    1272             : {
    1273          14 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    1274             :         struct spdk_io_channel *ch;
    1275             : 
    1276          14 :         if (nbdev_io->io_path != NULL && nvme_io_path_is_available(nbdev_io->io_path)) {
    1277           3 :                 _bdev_nvme_submit_request(nbdev_ch, bdev_io);
    1278           3 :         } else {
    1279          11 :                 ch = spdk_io_channel_from_ctx(nbdev_ch);
    1280          11 :                 bdev_nvme_submit_request(ch, bdev_io);
    1281             :         }
    1282          14 : }
    1283             : 
    1284             : static int
    1285          14 : bdev_nvme_retry_ios(void *arg)
    1286             : {
    1287          14 :         struct nvme_bdev_channel *nbdev_ch = arg;
    1288             :         struct nvme_bdev_io *bio, *tmp_bio;
    1289             :         uint64_t now, delay_us;
    1290             : 
    1291          14 :         now = spdk_get_ticks();
    1292             : 
    1293          28 :         TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
    1294          15 :                 if (bio->retry_ticks > now) {
    1295           1 :                         break;
    1296             :                 }
    1297             : 
    1298          14 :                 TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
    1299             : 
    1300          14 :                 bdev_nvme_retry_io(nbdev_ch, spdk_bdev_io_from_ctx(bio));
    1301          14 :         }
    1302             : 
    1303          14 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1304             : 
    1305          14 :         bio = TAILQ_FIRST(&nbdev_ch->retry_io_list);
    1306          14 :         if (bio != NULL) {
    1307           4 :                 delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
    1308             : 
    1309           4 :                 nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
    1310             :                                             delay_us);
    1311           4 :         }
    1312             : 
    1313          14 :         return SPDK_POLLER_BUSY;
    1314             : }
    1315             : 
    1316             : static void
    1317          16 : bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
    1318             :                          struct nvme_bdev_io *bio, uint64_t delay_ms)
    1319             : {
    1320             :         struct nvme_bdev_io *tmp_bio;
    1321             : 
    1322          16 :         bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;
    1323             : 
    1324          16 :         TAILQ_FOREACH_REVERSE(tmp_bio, &nbdev_ch->retry_io_list, retry_io_head, retry_link) {
    1325           1 :                 if (tmp_bio->retry_ticks <= bio->retry_ticks) {
    1326           1 :                         TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bio, bio,
    1327             :                                            retry_link);
    1328           1 :                         return;
    1329             :                 }
    1330           0 :         }
    1331             : 
    1332             :         /* No earlier I/Os were found. This I/O must be the new head. */
    1333          15 :         TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bio, retry_link);
    1334             : 
    1335          15 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1336             : 
    1337          15 :         nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
    1338             :                                     delay_ms * 1000ULL);
    1339          16 : }
    1340             : 
    1341             : static void
    1342          54 : bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
    1343             : {
    1344             :         struct nvme_bdev_io *bio, *tmp_bio;
    1345             : 
    1346          55 :         TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
    1347           1 :                 TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
    1348           1 :                 __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
    1349           1 :         }
    1350             : 
    1351          54 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1352          54 : }
    1353             : 
    1354             : static int
    1355           6 : bdev_nvme_abort_retry_io(struct nvme_bdev_channel *nbdev_ch,
    1356             :                          struct nvme_bdev_io *bio_to_abort)
    1357             : {
    1358             :         struct nvme_bdev_io *bio;
    1359             : 
    1360           6 :         TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
    1361           1 :                 if (bio == bio_to_abort) {
    1362           1 :                         TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
    1363           1 :                         __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
    1364           1 :                         return 0;
    1365             :                 }
    1366           0 :         }
    1367             : 
    1368           5 :         return -ENOENT;
    1369           6 : }
    1370             : 
    1371             : static void
    1372          12 : bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl)
    1373             : {
    1374             :         struct nvme_bdev *nbdev;
    1375             :         uint16_t sct, sc;
    1376             : 
    1377          12 :         assert(spdk_nvme_cpl_is_error(cpl));
    1378             : 
    1379          12 :         nbdev = bdev_io->bdev->ctxt;
    1380             : 
    1381          12 :         if (nbdev->err_stat == NULL) {
    1382          12 :                 return;
    1383             :         }
    1384             : 
    1385           0 :         sct = cpl->status.sct;
    1386           0 :         sc = cpl->status.sc;
    1387             : 
    1388           0 :         pthread_mutex_lock(&nbdev->mutex);
    1389             : 
    1390           0 :         nbdev->err_stat->status_type[sct]++;
    1391           0 :         switch (sct) {
    1392             :         case SPDK_NVME_SCT_GENERIC:
    1393             :         case SPDK_NVME_SCT_COMMAND_SPECIFIC:
    1394             :         case SPDK_NVME_SCT_MEDIA_ERROR:
    1395             :         case SPDK_NVME_SCT_PATH:
    1396           0 :                 nbdev->err_stat->status[sct][sc]++;
    1397           0 :                 break;
    1398             :         default:
    1399           0 :                 break;
    1400             :         }
    1401             : 
    1402           0 :         pthread_mutex_unlock(&nbdev->mutex);
    1403          12 : }
    1404             : 
    1405             : static inline void
    1406          20 : bdev_nvme_update_io_path_stat(struct nvme_bdev_io *bio)
    1407             : {
    1408          20 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1409          20 :         uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
    1410          20 :         uint32_t blocklen = bdev_io->bdev->blocklen;
    1411             :         struct spdk_bdev_io_stat *stat;
    1412             :         uint64_t tsc_diff;
    1413             : 
    1414          20 :         if (bio->io_path->stat == NULL) {
    1415          20 :                 return;
    1416             :         }
    1417             : 
    1418           0 :         tsc_diff = spdk_get_ticks() - bio->submit_tsc;
    1419           0 :         stat = bio->io_path->stat;
    1420             : 
    1421           0 :         switch (bdev_io->type) {
    1422             :         case SPDK_BDEV_IO_TYPE_READ:
    1423           0 :                 stat->bytes_read += num_blocks * blocklen;
    1424           0 :                 stat->num_read_ops++;
    1425           0 :                 stat->read_latency_ticks += tsc_diff;
    1426           0 :                 if (stat->max_read_latency_ticks < tsc_diff) {
    1427           0 :                         stat->max_read_latency_ticks = tsc_diff;
    1428           0 :                 }
    1429           0 :                 if (stat->min_read_latency_ticks > tsc_diff) {
    1430           0 :                         stat->min_read_latency_ticks = tsc_diff;
    1431           0 :                 }
    1432           0 :                 break;
    1433             :         case SPDK_BDEV_IO_TYPE_WRITE:
    1434           0 :                 stat->bytes_written += num_blocks * blocklen;
    1435           0 :                 stat->num_write_ops++;
    1436           0 :                 stat->write_latency_ticks += tsc_diff;
    1437           0 :                 if (stat->max_write_latency_ticks < tsc_diff) {
    1438           0 :                         stat->max_write_latency_ticks = tsc_diff;
    1439           0 :                 }
    1440           0 :                 if (stat->min_write_latency_ticks > tsc_diff) {
    1441           0 :                         stat->min_write_latency_ticks = tsc_diff;
    1442           0 :                 }
    1443           0 :                 break;
    1444             :         case SPDK_BDEV_IO_TYPE_UNMAP:
    1445           0 :                 stat->bytes_unmapped += num_blocks * blocklen;
    1446           0 :                 stat->num_unmap_ops++;
    1447           0 :                 stat->unmap_latency_ticks += tsc_diff;
    1448           0 :                 if (stat->max_unmap_latency_ticks < tsc_diff) {
    1449           0 :                         stat->max_unmap_latency_ticks = tsc_diff;
    1450           0 :                 }
    1451           0 :                 if (stat->min_unmap_latency_ticks > tsc_diff) {
    1452           0 :                         stat->min_unmap_latency_ticks = tsc_diff;
    1453           0 :                 }
    1454           0 :                 break;
    1455             :         case SPDK_BDEV_IO_TYPE_ZCOPY:
    1456             :                 /* Track the data in the start phase only */
    1457           0 :                 if (!bdev_io->u.bdev.zcopy.start) {
    1458           0 :                         break;
    1459             :                 }
    1460           0 :                 if (bdev_io->u.bdev.zcopy.populate) {
    1461           0 :                         stat->bytes_read += num_blocks * blocklen;
    1462           0 :                         stat->num_read_ops++;
    1463           0 :                         stat->read_latency_ticks += tsc_diff;
    1464           0 :                         if (stat->max_read_latency_ticks < tsc_diff) {
    1465           0 :                                 stat->max_read_latency_ticks = tsc_diff;
    1466           0 :                         }
    1467           0 :                         if (stat->min_read_latency_ticks > tsc_diff) {
    1468           0 :                                 stat->min_read_latency_ticks = tsc_diff;
    1469           0 :                         }
    1470           0 :                 } else {
    1471           0 :                         stat->bytes_written += num_blocks * blocklen;
    1472           0 :                         stat->num_write_ops++;
    1473           0 :                         stat->write_latency_ticks += tsc_diff;
    1474           0 :                         if (stat->max_write_latency_ticks < tsc_diff) {
    1475           0 :                                 stat->max_write_latency_ticks = tsc_diff;
    1476           0 :                         }
    1477           0 :                         if (stat->min_write_latency_ticks > tsc_diff) {
    1478           0 :                                 stat->min_write_latency_ticks = tsc_diff;
    1479           0 :                         }
    1480             :                 }
    1481           0 :                 break;
    1482             :         case SPDK_BDEV_IO_TYPE_COPY:
    1483           0 :                 stat->bytes_copied += num_blocks * blocklen;
    1484           0 :                 stat->num_copy_ops++;
    1485           0 :                 stat->copy_latency_ticks += tsc_diff;
    1486           0 :                 if (stat->max_copy_latency_ticks < tsc_diff) {
    1487           0 :                         stat->max_copy_latency_ticks = tsc_diff;
    1488           0 :                 }
    1489           0 :                 if (stat->min_copy_latency_ticks > tsc_diff) {
    1490           0 :                         stat->min_copy_latency_ticks = tsc_diff;
    1491           0 :                 }
    1492           0 :                 break;
    1493             :         default:
    1494           0 :                 break;
    1495             :         }
    1496          20 : }
    1497             : 
    1498             : static bool
    1499          11 : bdev_nvme_check_retry_io(struct nvme_bdev_io *bio,
    1500             :                          const struct spdk_nvme_cpl *cpl,
    1501             :                          struct nvme_bdev_channel *nbdev_ch,
    1502             :                          uint64_t *_delay_ms)
    1503             : {
    1504          11 :         struct nvme_io_path *io_path = bio->io_path;
    1505          11 :         struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
    1506             :         const struct spdk_nvme_ctrlr_data *cdata;
    1507             : 
    1508          15 :         if (spdk_nvme_cpl_is_path_error(cpl) ||
    1509           5 :             spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
    1510           0 :             !nvme_io_path_is_available(io_path) ||
    1511           4 :             !nvme_ctrlr_is_available(nvme_ctrlr)) {
    1512          15 :                 bdev_nvme_clear_current_io_path(nbdev_ch);
    1513          15 :                 bio->io_path = NULL;
    1514          15 :                 if (spdk_nvme_cpl_is_ana_error(cpl)) {
    1515           1 :                         if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
    1516           1 :                                 io_path->nvme_ns->ana_state_updating = true;
    1517           1 :                         }
    1518           1 :                 }
    1519           3 :                 if (!any_io_path_may_become_available(nbdev_ch)) {
    1520           0 :                         return false;
    1521             :                 }
    1522           3 :                 *_delay_ms = 0;
    1523           3 :         } else {
    1524           4 :                 bio->retry_count++;
    1525             : 
    1526           4 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    1527             : 
    1528           4 :                 if (cpl->status.crd != 0) {
    1529           1 :                         *_delay_ms = cdata->crdt[cpl->status.crd] * 100;
    1530           1 :                 } else {
    1531           3 :                         *_delay_ms = 0;
    1532             :                 }
    1533             :         }
    1534             : 
    1535           7 :         return true;
    1536           7 : }
    1537             : 
    1538             : static inline void
    1539          40 : bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
    1540             :                                   const struct spdk_nvme_cpl *cpl)
    1541             : {
    1542          40 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1543             :         struct nvme_bdev_channel *nbdev_ch;
    1544             :         uint64_t delay_ms;
    1545             : 
    1546          40 :         assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
    1547             : 
    1548          40 :         if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
    1549          20 :                 bdev_nvme_update_io_path_stat(bio);
    1550          20 :                 goto complete;
    1551             :         }
    1552             : 
    1553             :         /* Update error counts before deciding if retry is needed.
    1554             :          * Hence, error counts may be more than the number of I/O errors.
    1555             :          */
    1556          20 :         bdev_nvme_update_nvme_error_stat(bdev_io, cpl);
    1557             : 
    1558          27 :         if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) ||
    1559           2 :             (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
    1560          23 :                 goto complete;
    1561             :         }
    1562             : 
    1563             :         /* At this point we don't know whether the sequence was successfully executed or not, so we
    1564             :          * cannot retry the IO */
    1565           7 :         if (bdev_io->u.bdev.accel_sequence != NULL) {
    1566           0 :                 goto complete;
    1567             :         }
    1568             : 
    1569           7 :         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    1570             : 
    1571           7 :         if (bdev_nvme_check_retry_io(bio, cpl, nbdev_ch, &delay_ms)) {
    1572           7 :                 bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
    1573           7 :                 return;
    1574             :         }
    1575             : 
    1576             : complete:
    1577          25 :         bio->retry_count = 0;
    1578          25 :         bio->submit_tsc = 0;
    1579          25 :         bdev_io->u.bdev.accel_sequence = NULL;
    1580          25 :         __bdev_nvme_io_complete(bdev_io, 0, cpl);
    1581          32 : }
    1582             : 
    1583             : static inline void
    1584          13 : bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
    1585             : {
    1586          13 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1587             :         struct nvme_bdev_channel *nbdev_ch;
    1588             :         enum spdk_bdev_io_status io_status;
    1589             : 
    1590          13 :         assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
    1591             : 
    1592          13 :         switch (rc) {
    1593             :         case 0:
    1594           1 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    1595           1 :                 break;
    1596             :         case -ENOMEM:
    1597           0 :                 io_status = SPDK_BDEV_IO_STATUS_NOMEM;
    1598           0 :                 break;
    1599             :         case -ENXIO:
    1600          15 :                 if (g_opts.bdev_retry_count == -1 || bio->retry_count < g_opts.bdev_retry_count) {
    1601          12 :                         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    1602             : 
    1603          12 :                         bdev_nvme_clear_current_io_path(nbdev_ch);
    1604          12 :                         bio->io_path = NULL;
    1605             : 
    1606          12 :                         if (any_io_path_may_become_available(nbdev_ch)) {
    1607           9 :                                 bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
    1608           9 :                                 return;
    1609             :                         }
    1610           3 :                 }
    1611             : 
    1612             :         /* fallthrough */
    1613             :         default:
    1614           3 :                 spdk_accel_sequence_abort(bdev_io->u.bdev.accel_sequence);
    1615           3 :                 bdev_io->u.bdev.accel_sequence = NULL;
    1616           3 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    1617           3 :                 break;
    1618             :         }
    1619             : 
    1620           4 :         bio->retry_count = 0;
    1621           4 :         bio->submit_tsc = 0;
    1622           4 :         __bdev_nvme_io_complete(bdev_io, io_status, NULL);
    1623          13 : }
    1624             : 
    1625             : static inline void
    1626           4 : bdev_nvme_admin_complete(struct nvme_bdev_io *bio, int rc)
    1627             : {
    1628           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1629             :         enum spdk_bdev_io_status io_status;
    1630             : 
    1631           4 :         switch (rc) {
    1632             :         case 0:
    1633           1 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    1634           1 :                 break;
    1635             :         case -ENOMEM:
    1636           0 :                 io_status = SPDK_BDEV_IO_STATUS_NOMEM;
    1637           0 :                 break;
    1638           1 :         case -ENXIO:
    1639             :         /* fallthrough */
    1640             :         default:
    1641           3 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    1642           3 :                 break;
    1643             :         }
    1644             : 
    1645           4 :         __bdev_nvme_io_complete(bdev_io, io_status, NULL);
    1646           4 : }
    1647             : 
    1648             : static void
    1649           3 : bdev_nvme_clear_io_path_caches_done(struct nvme_ctrlr *nvme_ctrlr,
    1650             :                                     void *ctx, int status)
    1651             : {
    1652           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    1653             : 
    1654           3 :         assert(nvme_ctrlr->io_path_cache_clearing == true);
    1655           3 :         nvme_ctrlr->io_path_cache_clearing = false;
    1656             : 
    1657           3 :         if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    1658           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1659           3 :                 return;
    1660             :         }
    1661             : 
    1662           0 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1663             : 
    1664           0 :         nvme_ctrlr_unregister(nvme_ctrlr);
    1665           3 : }
    1666             : 
    1667             : static void
    1668         408 : _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
    1669             : {
    1670             :         struct nvme_io_path *io_path;
    1671             : 
    1672         635 :         TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
    1673         227 :                 if (io_path->nbdev_ch == NULL) {
    1674          68 :                         continue;
    1675             :                 }
    1676         159 :                 bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
    1677         159 :         }
    1678         408 : }
    1679             : 
    1680             : static void
    1681           1 : bdev_nvme_clear_io_path_cache(struct nvme_ctrlr_channel_iter *i,
    1682             :                               struct nvme_ctrlr *nvme_ctrlr,
    1683             :                               struct nvme_ctrlr_channel *ctrlr_ch,
    1684             :                               void *ctx)
    1685             : {
    1686           1 :         assert(ctrlr_ch->qpair != NULL);
    1687             : 
    1688           1 :         _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
    1689             : 
    1690           1 :         nvme_ctrlr_for_each_channel_continue(i, 0);
    1691           1 : }
    1692             : 
    1693             : static void
    1694           3 : bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr)
    1695             : {
    1696           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    1697           3 :         if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
    1698           3 :             nvme_ctrlr->io_path_cache_clearing) {
    1699           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1700           0 :                 return;
    1701             :         }
    1702             : 
    1703           3 :         nvme_ctrlr->io_path_cache_clearing = true;
    1704           3 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1705             : 
    1706           3 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    1707             :                                     bdev_nvme_clear_io_path_cache,
    1708             :                                     NULL,
    1709             :                                     bdev_nvme_clear_io_path_caches_done);
    1710           3 : }
    1711             : 
    1712             : static struct nvme_qpair *
    1713         117 : nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair *qpair)
    1714             : {
    1715             :         struct nvme_qpair *nvme_qpair;
    1716             : 
    1717         134 :         TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
    1718         134 :                 if (nvme_qpair->qpair == qpair) {
    1719         117 :                         break;
    1720             :                 }
    1721          17 :         }
    1722             : 
    1723         117 :         return nvme_qpair;
    1724             : }
    1725             : 
    1726             : static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair);
    1727             : 
    1728             : static void
    1729         117 : bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
    1730             : {
    1731         117 :         struct nvme_poll_group *group = poll_group_ctx;
    1732             :         struct nvme_qpair *nvme_qpair;
    1733             :         struct nvme_ctrlr *nvme_ctrlr;
    1734             :         struct nvme_ctrlr_channel *ctrlr_ch;
    1735             :         int status;
    1736             : 
    1737         117 :         nvme_qpair = nvme_poll_group_get_qpair(group, qpair);
    1738         117 :         if (nvme_qpair == NULL) {
    1739           0 :                 return;
    1740             :         }
    1741             : 
    1742         117 :         if (nvme_qpair->qpair != NULL) {
    1743         117 :                 spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
    1744         117 :                 nvme_qpair->qpair = NULL;
    1745         117 :         }
    1746             : 
    1747         117 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1748             : 
    1749         117 :         nvme_ctrlr = nvme_qpair->ctrlr;
    1750         117 :         ctrlr_ch = nvme_qpair->ctrlr_ch;
    1751             : 
    1752         117 :         if (ctrlr_ch != NULL) {
    1753          72 :                 if (ctrlr_ch->reset_iter != NULL) {
    1754             :                         /* We are in a full reset sequence. */
    1755          67 :                         if (ctrlr_ch->connect_poller != NULL) {
    1756             :                                 /* qpair was failed to connect. Abort the reset sequence. */
    1757           0 :                                 NVME_CTRLR_INFOLOG(nvme_ctrlr,
    1758             :                                                    "qpair %p was failed to connect. abort the reset ctrlr sequence.\n",
    1759             :                                                    qpair);
    1760           0 :                                 spdk_poller_unregister(&ctrlr_ch->connect_poller);
    1761           0 :                                 status = -1;
    1762           0 :                         } else {
    1763             :                                 /* qpair was completed to disconnect. Just move to the next ctrlr_channel. */
    1764          67 :                                 NVME_CTRLR_INFOLOG(nvme_ctrlr,
    1765             :                                                    "qpair %p was disconnected and freed in a reset ctrlr sequence.\n",
    1766             :                                                    qpair);
    1767          67 :                                 status = 0;
    1768             :                         }
    1769          67 :                         nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, status);
    1770          67 :                         ctrlr_ch->reset_iter = NULL;
    1771          67 :                 } else {
    1772             :                         /* qpair was disconnected unexpectedly. Reset controller for recovery. */
    1773           5 :                         NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpair %p was disconnected and freed. reset controller.\n",
    1774             :                                            qpair);
    1775           5 :                         bdev_nvme_failover_ctrlr(nvme_ctrlr);
    1776             :                 }
    1777          72 :         } else {
    1778             :                 /* In this case, ctrlr_channel is already deleted. */
    1779          45 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpair %p was disconnected and freed. delete nvme_qpair.\n",
    1780             :                                    qpair);
    1781          45 :                 nvme_qpair_delete(nvme_qpair);
    1782             :         }
    1783         117 : }
    1784             : 
    1785             : static void
    1786           0 : bdev_nvme_check_io_qpairs(struct nvme_poll_group *group)
    1787             : {
    1788             :         struct nvme_qpair *nvme_qpair;
    1789             : 
    1790           0 :         TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
    1791           0 :                 if (nvme_qpair->qpair == NULL || nvme_qpair->ctrlr_ch == NULL) {
    1792           0 :                         continue;
    1793             :                 }
    1794             : 
    1795           0 :                 if (spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
    1796             :                     SPDK_NVME_QPAIR_FAILURE_NONE) {
    1797           0 :                         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1798           0 :                 }
    1799           0 :         }
    1800           0 : }
    1801             : 
    1802             : static int
    1803        1470 : bdev_nvme_poll(void *arg)
    1804             : {
    1805        1470 :         struct nvme_poll_group *group = arg;
    1806             :         int64_t num_completions;
    1807             : 
    1808        1470 :         if (group->collect_spin_stat && group->start_ticks == 0) {
    1809           0 :                 group->start_ticks = spdk_get_ticks();
    1810           0 :         }
    1811             : 
    1812        1470 :         num_completions = spdk_nvme_poll_group_process_completions(group->group, 0,
    1813             :                           bdev_nvme_disconnected_qpair_cb);
    1814        1470 :         if (group->collect_spin_stat) {
    1815           0 :                 if (num_completions > 0) {
    1816           0 :                         if (group->end_ticks != 0) {
    1817           0 :                                 group->spin_ticks += (group->end_ticks - group->start_ticks);
    1818           0 :                                 group->end_ticks = 0;
    1819           0 :                         }
    1820           0 :                         group->start_ticks = 0;
    1821           0 :                 } else {
    1822           0 :                         group->end_ticks = spdk_get_ticks();
    1823             :                 }
    1824           0 :         }
    1825             : 
    1826        1470 :         if (spdk_unlikely(num_completions < 0)) {
    1827           0 :                 bdev_nvme_check_io_qpairs(group);
    1828           0 :         }
    1829             : 
    1830        1470 :         return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
    1831             : }
    1832             : 
    1833             : static int bdev_nvme_poll_adminq(void *arg);
    1834             : 
    1835             : static void
    1836         140 : bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_period_us)
    1837             : {
    1838         140 :         spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
    1839             : 
    1840         140 :         nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq,
    1841             :                                           nvme_ctrlr, new_period_us);
    1842         140 : }
    1843             : 
    1844             : static int
    1845         187 : bdev_nvme_poll_adminq(void *arg)
    1846             : {
    1847             :         int32_t rc;
    1848         187 :         struct nvme_ctrlr *nvme_ctrlr = arg;
    1849             :         nvme_ctrlr_disconnected_cb disconnected_cb;
    1850             : 
    1851         187 :         assert(nvme_ctrlr != NULL);
    1852             : 
    1853         187 :         rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
    1854         187 :         if (rc < 0) {
    1855          85 :                 disconnected_cb = nvme_ctrlr->disconnected_cb;
    1856          85 :                 nvme_ctrlr->disconnected_cb = NULL;
    1857             : 
    1858          85 :                 if (disconnected_cb != NULL) {
    1859         140 :                         bdev_nvme_change_adminq_poll_period(nvme_ctrlr,
    1860          70 :                                                             g_opts.nvme_adminq_poll_period_us);
    1861          70 :                         disconnected_cb(nvme_ctrlr);
    1862          70 :                 } else {
    1863          15 :                         bdev_nvme_failover_ctrlr(nvme_ctrlr);
    1864             :                 }
    1865         187 :         } else if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(nvme_ctrlr->ctrlr) !=
    1866             :                    SPDK_NVME_QPAIR_FAILURE_NONE) {
    1867           0 :                 bdev_nvme_clear_io_path_caches(nvme_ctrlr);
    1868           0 :         }
    1869             : 
    1870         187 :         return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
    1871             : }
    1872             : 
    1873             : static void
    1874          38 : nvme_bdev_free(void *io_device)
    1875             : {
    1876          38 :         struct nvme_bdev *nvme_disk = io_device;
    1877             : 
    1878          38 :         pthread_mutex_destroy(&nvme_disk->mutex);
    1879          38 :         free(nvme_disk->disk.name);
    1880          38 :         free(nvme_disk->err_stat);
    1881          38 :         free(nvme_disk);
    1882          38 : }
    1883             : 
    1884             : static int
    1885          37 : bdev_nvme_destruct(void *ctx)
    1886             : {
    1887          37 :         struct nvme_bdev *nvme_disk = ctx;
    1888             :         struct nvme_ns *nvme_ns, *tmp_nvme_ns;
    1889             : 
    1890             :         SPDK_DTRACE_PROBE2(bdev_nvme_destruct, nvme_disk->nbdev_ctrlr->name, nvme_disk->nsid);
    1891             : 
    1892          75 :         TAILQ_FOREACH_SAFE(nvme_ns, &nvme_disk->nvme_ns_list, tailq, tmp_nvme_ns) {
    1893          38 :                 pthread_mutex_lock(&nvme_ns->ctrlr->mutex);
    1894             : 
    1895          38 :                 nvme_ns->bdev = NULL;
    1896             : 
    1897          38 :                 assert(nvme_ns->id > 0);
    1898             : 
    1899          38 :                 if (nvme_ctrlr_get_ns(nvme_ns->ctrlr, nvme_ns->id) == NULL) {
    1900           0 :                         pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
    1901             : 
    1902           0 :                         nvme_ctrlr_release(nvme_ns->ctrlr);
    1903           0 :                         nvme_ns_free(nvme_ns);
    1904           0 :                 } else {
    1905          38 :                         pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
    1906             :                 }
    1907          38 :         }
    1908             : 
    1909          37 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    1910          37 :         TAILQ_REMOVE(&nvme_disk->nbdev_ctrlr->bdevs, nvme_disk, tailq);
    1911          37 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    1912             : 
    1913          37 :         spdk_io_device_unregister(nvme_disk, nvme_bdev_free);
    1914             : 
    1915          37 :         return 0;
    1916             : }
    1917             : 
    1918             : static int
    1919         118 : bdev_nvme_create_qpair(struct nvme_qpair *nvme_qpair)
    1920             : {
    1921             :         struct nvme_ctrlr *nvme_ctrlr;
    1922             :         struct spdk_nvme_io_qpair_opts opts;
    1923             :         struct spdk_nvme_qpair *qpair;
    1924             :         int rc;
    1925             : 
    1926         118 :         nvme_ctrlr = nvme_qpair->ctrlr;
    1927             : 
    1928         118 :         spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
    1929         118 :         opts.delay_cmd_submit = g_opts.delay_cmd_submit;
    1930         118 :         opts.create_only = true;
    1931         118 :         opts.async_mode = true;
    1932         118 :         opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests);
    1933         118 :         g_opts.io_queue_requests = opts.io_queue_requests;
    1934             : 
    1935         118 :         qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
    1936         118 :         if (qpair == NULL) {
    1937           0 :                 return -1;
    1938             :         }
    1939             : 
    1940             :         SPDK_DTRACE_PROBE3(bdev_nvme_create_qpair, nvme_ctrlr->nbdev_ctrlr->name,
    1941             :                            spdk_nvme_qpair_get_id(qpair), spdk_thread_get_id(nvme_ctrlr->thread));
    1942             : 
    1943         118 :         assert(nvme_qpair->group != NULL);
    1944             : 
    1945         118 :         rc = spdk_nvme_poll_group_add(nvme_qpair->group->group, qpair);
    1946         118 :         if (rc != 0) {
    1947           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Unable to begin polling on NVMe Channel.\n");
    1948           0 :                 goto err;
    1949             :         }
    1950             : 
    1951         118 :         rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_ctrlr->ctrlr, qpair);
    1952         118 :         if (rc != 0) {
    1953           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Unable to connect I/O qpair.\n");
    1954           0 :                 goto err;
    1955             :         }
    1956             : 
    1957         118 :         nvme_qpair->qpair = qpair;
    1958             : 
    1959         118 :         if (!g_opts.disable_auto_failback) {
    1960          85 :                 _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1961          85 :         }
    1962             : 
    1963         118 :         NVME_CTRLR_INFOLOG(nvme_ctrlr, "Connecting qpair %p:%u started.\n",
    1964             :                            qpair, spdk_nvme_qpair_get_id(qpair));
    1965             : 
    1966         118 :         return 0;
    1967             : 
    1968             : err:
    1969           0 :         spdk_nvme_ctrlr_free_io_qpair(qpair);
    1970             : 
    1971           0 :         return rc;
    1972         118 : }
    1973             : 
    1974             : static void bdev_nvme_reset_io_continue(void *cb_arg, int rc);
    1975             : 
    1976             : static void
    1977         122 : bdev_nvme_complete_pending_resets(struct nvme_ctrlr_channel_iter *i,
    1978             :                                   struct nvme_ctrlr *nvme_ctrlr,
    1979             :                                   struct nvme_ctrlr_channel *ctrlr_ch,
    1980             :                                   void *ctx)
    1981             : {
    1982         122 :         int rc = 0;
    1983             :         struct nvme_bdev_io *bio;
    1984             : 
    1985         122 :         if (ctx != NULL) {
    1986          59 :                 rc = -1;
    1987          59 :         }
    1988             : 
    1989         133 :         while (!TAILQ_EMPTY(&ctrlr_ch->pending_resets)) {
    1990          11 :                 bio = TAILQ_FIRST(&ctrlr_ch->pending_resets);
    1991          11 :                 TAILQ_REMOVE(&ctrlr_ch->pending_resets, bio, retry_link);
    1992             : 
    1993          11 :                 bdev_nvme_reset_io_continue(bio, rc);
    1994             :         }
    1995             : 
    1996         122 :         nvme_ctrlr_for_each_channel_continue(i, 0);
    1997         122 : }
    1998             : 
    1999             : /* This function marks the current trid as failed by storing the current ticks
    2000             :  * and then sets the next trid to the active trid within a controller if exists.
    2001             :  *
    2002             :  * The purpose of the boolean return value is to request the caller to disconnect
    2003             :  * the current trid now to try connecting the next trid.
    2004             :  */
    2005             : static bool
    2006          61 : bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove, bool start)
    2007             : {
    2008             :         struct nvme_path_id *path_id, *next_path;
    2009             :         int rc __attribute__((unused));
    2010             : 
    2011          61 :         path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
    2012          61 :         assert(path_id);
    2013          61 :         assert(path_id == nvme_ctrlr->active_path_id);
    2014          61 :         next_path = TAILQ_NEXT(path_id, link);
    2015             : 
    2016             :         /* Update the last failed time. It means the trid is failed if its last
    2017             :          * failed time is non-zero.
    2018             :          */
    2019          61 :         path_id->last_failed_tsc = spdk_get_ticks();
    2020             : 
    2021          61 :         if (next_path == NULL) {
    2022             :                 /* There is no alternate trid within a controller. */
    2023          50 :                 return false;
    2024             :         }
    2025             : 
    2026          11 :         if (!start && nvme_ctrlr->opts.reconnect_delay_sec == 0) {
    2027             :                 /* Connect is not retried in a controller reset sequence. Connecting
    2028             :                  * the next trid will be done by the next bdev_nvme_failover_ctrlr() call.
    2029             :                  */
    2030           3 :                 return false;
    2031             :         }
    2032             : 
    2033           8 :         assert(path_id->trid.trtype != SPDK_NVME_TRANSPORT_PCIE);
    2034             : 
    2035           8 :         NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Start failover from %s:%s to %s:%s\n",
    2036             :                              path_id->trid.traddr, path_id->trid.trsvcid,
    2037             :                              next_path->trid.traddr, next_path->trid.trsvcid);
    2038             : 
    2039           8 :         spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
    2040           8 :         nvme_ctrlr->active_path_id = next_path;
    2041           8 :         rc = spdk_nvme_ctrlr_set_trid(nvme_ctrlr->ctrlr, &next_path->trid);
    2042           8 :         assert(rc == 0);
    2043           8 :         TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
    2044           8 :         if (!remove) {
    2045             :                 /** Shuffle the old trid to the end of the list and use the new one.
    2046             :                  * Allows for round robin through multiple connections.
    2047             :                  */
    2048           6 :                 TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, path_id, link);
    2049           6 :         } else {
    2050           2 :                 free(path_id);
    2051             :         }
    2052             : 
    2053           8 :         if (start || next_path->last_failed_tsc == 0) {
    2054             :                 /* bdev_nvme_failover_ctrlr() is just called or the next trid is not failed
    2055             :                  * or used yet. Try the next trid now.
    2056             :                  */
    2057           7 :                 return true;
    2058             :         }
    2059             : 
    2060           2 :         if (spdk_get_ticks() > next_path->last_failed_tsc + spdk_get_ticks_hz() *
    2061           1 :             nvme_ctrlr->opts.reconnect_delay_sec) {
    2062             :                 /* Enough backoff passed since the next trid failed. Try the next trid now. */
    2063           0 :                 return true;
    2064             :         }
    2065             : 
    2066             :         /* The next trid will be tried after reconnect_delay_sec seconds. */
    2067           1 :         return false;
    2068          61 : }
    2069             : 
    2070             : static bool
    2071          88 : bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr)
    2072             : {
    2073             :         int32_t elapsed;
    2074             : 
    2075          88 :         if (nvme_ctrlr->opts.ctrlr_loss_timeout_sec == 0 ||
    2076          37 :             nvme_ctrlr->opts.ctrlr_loss_timeout_sec == -1) {
    2077          62 :                 return false;
    2078             :         }
    2079             : 
    2080          26 :         elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
    2081          26 :         if (elapsed >= nvme_ctrlr->opts.ctrlr_loss_timeout_sec) {
    2082           6 :                 return true;
    2083             :         } else {
    2084          20 :                 return false;
    2085             :         }
    2086          88 : }
    2087             : 
    2088             : static bool
    2089          12 : bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr)
    2090             : {
    2091             :         uint32_t elapsed;
    2092             : 
    2093          12 :         if (nvme_ctrlr->opts.fast_io_fail_timeout_sec == 0) {
    2094           8 :                 return false;
    2095             :         }
    2096             : 
    2097           4 :         elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
    2098           4 :         if (elapsed >= nvme_ctrlr->opts.fast_io_fail_timeout_sec) {
    2099           2 :                 return true;
    2100             :         } else {
    2101           2 :                 return false;
    2102             :         }
    2103          12 : }
    2104             : 
    2105             : static void bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success);
    2106             : 
    2107             : static void
    2108          71 : nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb cb_fn)
    2109             : {
    2110             :         int rc;
    2111             : 
    2112          71 :         NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start disconnecting ctrlr.\n");
    2113             : 
    2114          71 :         rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
    2115          71 :         if (rc != 0) {
    2116           1 :                 NVME_CTRLR_WARNLOG(nvme_ctrlr, "disconnecting ctrlr failed.\n");
    2117             : 
    2118             :                 /* Disconnect fails if ctrlr is already resetting or removed. In this case,
    2119             :                  * fail the reset sequence immediately.
    2120             :                  */
    2121           1 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2122           1 :                 return;
    2123             :         }
    2124             : 
    2125             :         /* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
    2126             :          * Set callback here to execute the specified operation after ctrlr is really disconnected.
    2127             :          */
    2128          70 :         assert(nvme_ctrlr->disconnected_cb == NULL);
    2129          70 :         nvme_ctrlr->disconnected_cb = cb_fn;
    2130             : 
    2131             :         /* During disconnection, reduce the period to poll adminq more often. */
    2132          70 :         bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0);
    2133          71 : }
    2134             : 
    2135             : enum bdev_nvme_op_after_reset {
    2136             :         OP_NONE,
    2137             :         OP_COMPLETE_PENDING_DESTRUCT,
    2138             :         OP_DESTRUCT,
    2139             :         OP_DELAYED_RECONNECT,
    2140             :         OP_FAILOVER,
    2141             : };
    2142             : 
    2143             : typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset;
    2144             : 
    2145             : static _bdev_nvme_op_after_reset
    2146          70 : bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success)
    2147             : {
    2148          70 :         if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    2149             :                 /* Complete pending destruct after reset completes. */
    2150           0 :                 return OP_COMPLETE_PENDING_DESTRUCT;
    2151          70 :         } else if (nvme_ctrlr->pending_failover) {
    2152           3 :                 nvme_ctrlr->pending_failover = false;
    2153           3 :                 nvme_ctrlr->reset_start_tsc = 0;
    2154           3 :                 return OP_FAILOVER;
    2155          67 :         } else if (success || nvme_ctrlr->opts.reconnect_delay_sec == 0) {
    2156          53 :                 nvme_ctrlr->reset_start_tsc = 0;
    2157          53 :                 return OP_NONE;
    2158          14 :         } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
    2159           2 :                 return OP_DESTRUCT;
    2160             :         } else {
    2161          12 :                 if (bdev_nvme_check_fast_io_fail_timeout(nvme_ctrlr)) {
    2162           2 :                         nvme_ctrlr->fast_io_fail_timedout = true;
    2163           2 :                 }
    2164          12 :                 return OP_DELAYED_RECONNECT;
    2165             :         }
    2166          70 : }
    2167             : 
    2168             : static int bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug);
    2169             : static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
    2170             : 
    2171             : static int
    2172           9 : bdev_nvme_reconnect_delay_timer_expired(void *ctx)
    2173             : {
    2174           9 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2175             : 
    2176             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect_delay, nvme_ctrlr->nbdev_ctrlr->name);
    2177           9 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2178             : 
    2179           9 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2180             : 
    2181           9 :         if (!nvme_ctrlr->reconnect_is_delayed) {
    2182           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2183           0 :                 return SPDK_POLLER_BUSY;
    2184             :         }
    2185             : 
    2186           9 :         nvme_ctrlr->reconnect_is_delayed = false;
    2187             : 
    2188           9 :         if (nvme_ctrlr->destruct) {
    2189           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2190           0 :                 return SPDK_POLLER_BUSY;
    2191             :         }
    2192             : 
    2193           9 :         assert(nvme_ctrlr->resetting == false);
    2194           9 :         nvme_ctrlr->resetting = true;
    2195             : 
    2196           9 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2197             : 
    2198           9 :         spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
    2199             : 
    2200           9 :         bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2201           9 :         return SPDK_POLLER_BUSY;
    2202           9 : }
    2203             : 
    2204             : static void
    2205          12 : bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)
    2206             : {
    2207          12 :         spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
    2208             : 
    2209          12 :         assert(nvme_ctrlr->reconnect_is_delayed == false);
    2210          12 :         nvme_ctrlr->reconnect_is_delayed = true;
    2211             : 
    2212          12 :         assert(nvme_ctrlr->reconnect_delay_timer == NULL);
    2213          12 :         nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired,
    2214             :                                             nvme_ctrlr,
    2215             :                                             nvme_ctrlr->opts.reconnect_delay_sec * SPDK_SEC_TO_USEC);
    2216          12 : }
    2217             : 
    2218             : static void remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr);
    2219             : 
    2220             : static void
    2221          68 : _bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2222             : {
    2223          68 :         bool success = (ctx == NULL);
    2224          68 :         bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
    2225          68 :         void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
    2226             :         enum bdev_nvme_op_after_reset op_after_reset;
    2227             : 
    2228          68 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2229             : 
    2230          68 :         nvme_ctrlr->ctrlr_op_cb_fn = NULL;
    2231          68 :         nvme_ctrlr->ctrlr_op_cb_arg = NULL;
    2232             : 
    2233          68 :         if (!success) {
    2234          33 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Resetting controller failed.\n");
    2235          33 :         } else {
    2236          35 :                 NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Resetting controller successful.\n");
    2237             :         }
    2238             : 
    2239          68 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2240          68 :         nvme_ctrlr->resetting = false;
    2241          68 :         nvme_ctrlr->dont_retry = false;
    2242          68 :         nvme_ctrlr->in_failover = false;
    2243             : 
    2244          68 :         op_after_reset = bdev_nvme_check_op_after_reset(nvme_ctrlr, success);
    2245          68 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2246             : 
    2247             :         /* Delay callbacks when the next operation is a failover. */
    2248          68 :         if (ctrlr_op_cb_fn && op_after_reset != OP_FAILOVER) {
    2249          17 :                 ctrlr_op_cb_fn(ctrlr_op_cb_arg, success ? 0 : -1);
    2250          17 :         }
    2251             : 
    2252          68 :         switch (op_after_reset) {
    2253             :         case OP_COMPLETE_PENDING_DESTRUCT:
    2254           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    2255           0 :                 break;
    2256             :         case OP_DESTRUCT:
    2257           2 :                 bdev_nvme_delete_ctrlr(nvme_ctrlr, false);
    2258           2 :                 remove_discovery_entry(nvme_ctrlr);
    2259           2 :                 break;
    2260             :         case OP_DELAYED_RECONNECT:
    2261          12 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_start_reconnect_delay_timer);
    2262          12 :                 break;
    2263             :         case OP_FAILOVER:
    2264           3 :                 nvme_ctrlr->ctrlr_op_cb_fn = ctrlr_op_cb_fn;
    2265           3 :                 nvme_ctrlr->ctrlr_op_cb_arg = ctrlr_op_cb_arg;
    2266           3 :                 bdev_nvme_failover_ctrlr(nvme_ctrlr);
    2267           3 :                 break;
    2268             :         default:
    2269          51 :                 break;
    2270             :         }
    2271          68 : }
    2272             : 
    2273             : static void
    2274          70 : bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
    2275             : {
    2276          70 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2277          70 :         if (!success) {
    2278             :                 /* Connecting the active trid failed. Set the next alternate trid to the
    2279             :                  * active trid if it exists.
    2280             :                  */
    2281          35 :                 if (bdev_nvme_failover_trid(nvme_ctrlr, false, false)) {
    2282             :                         /* The next alternate trid exists and is ready to try. Try it now. */
    2283           2 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2284             : 
    2285           2 :                         NVME_CTRLR_INFOLOG(nvme_ctrlr, "Try the next alternate trid %s:%s now.\n",
    2286             :                                            nvme_ctrlr->active_path_id->trid.traddr,
    2287             :                                            nvme_ctrlr->active_path_id->trid.trsvcid);
    2288             : 
    2289           2 :                         nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
    2290           2 :                         return;
    2291             :                 }
    2292             : 
    2293             :                 /* We came here if there is no alternate trid or if the next trid exists but
    2294             :                  * is not ready to try. We will try the active trid after reconnect_delay_sec
    2295             :                  * seconds if it is non-zero or at the next reset call otherwise.
    2296             :                  */
    2297          33 :         } else {
    2298             :                 /* Connecting the active trid succeeded. Clear the last failed time because it
    2299             :                  * means the trid is failed if its last failed time is non-zero.
    2300             :                  */
    2301          35 :                 nvme_ctrlr->active_path_id->last_failed_tsc = 0;
    2302             :         }
    2303          68 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2304             : 
    2305          68 :         NVME_CTRLR_INFOLOG(nvme_ctrlr, "Clear pending resets.\n");
    2306             : 
    2307             :         /* Make sure we clear any pending resets before returning. */
    2308         136 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2309             :                                     bdev_nvme_complete_pending_resets,
    2310          68 :                                     success ? NULL : (void *)0x1,
    2311             :                                     _bdev_nvme_reset_ctrlr_complete);
    2312          70 : }
    2313             : 
    2314             : static void
    2315           0 : bdev_nvme_reset_create_qpairs_failed(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2316             : {
    2317           0 :         bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2318           0 : }
    2319             : 
    2320             : static void
    2321         102 : bdev_nvme_reset_destroy_qpair(struct nvme_ctrlr_channel_iter *i,
    2322             :                               struct nvme_ctrlr *nvme_ctrlr,
    2323             :                               struct nvme_ctrlr_channel *ctrlr_ch, void *ctx)
    2324             : {
    2325             :         struct nvme_qpair *nvme_qpair;
    2326             :         struct spdk_nvme_qpair *qpair;
    2327             : 
    2328         102 :         nvme_qpair = ctrlr_ch->qpair;
    2329         102 :         assert(nvme_qpair != NULL);
    2330             : 
    2331         102 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    2332             : 
    2333         102 :         qpair = nvme_qpair->qpair;
    2334         102 :         if (qpair != NULL) {
    2335          67 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start disconnecting qpair %p:%u.\n",
    2336             :                                    qpair, spdk_nvme_qpair_get_id(qpair));
    2337             : 
    2338          67 :                 if (nvme_qpair->ctrlr->dont_retry) {
    2339          53 :                         spdk_nvme_qpair_set_abort_dnr(qpair, true);
    2340          53 :                 }
    2341          67 :                 spdk_nvme_ctrlr_disconnect_io_qpair(qpair);
    2342             : 
    2343             :                 /* The current full reset sequence will move to the next
    2344             :                  * ctrlr_channel after the qpair is actually disconnected.
    2345             :                  */
    2346          67 :                 assert(ctrlr_ch->reset_iter == NULL);
    2347          67 :                 ctrlr_ch->reset_iter = i;
    2348          67 :         } else {
    2349          35 :                 nvme_ctrlr_for_each_channel_continue(i, 0);
    2350             :         }
    2351         102 : }
    2352             : 
    2353             : static void
    2354          35 : bdev_nvme_reset_create_qpairs_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2355             : {
    2356          35 :         if (status == 0) {
    2357          35 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpairs were created after ctrlr reset.\n");
    2358             : 
    2359          35 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, true);
    2360          35 :         } else {
    2361           0 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpairs were failed to create after ctrlr reset.\n");
    2362             : 
    2363             :                 /* Delete the added qpairs and quiesce ctrlr to make the states clean. */
    2364           0 :                 nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2365             :                                             bdev_nvme_reset_destroy_qpair,
    2366             :                                             NULL,
    2367             :                                             bdev_nvme_reset_create_qpairs_failed);
    2368             :         }
    2369          35 : }
    2370             : 
    2371             : static int
    2372          59 : bdev_nvme_reset_check_qpair_connected(void *ctx)
    2373             : {
    2374          59 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx;
    2375          59 :         struct nvme_qpair *nvme_qpair = ctrlr_ch->qpair;
    2376             :         struct spdk_nvme_qpair *qpair;
    2377             : 
    2378          59 :         if (ctrlr_ch->reset_iter == NULL) {
    2379             :                 /* qpair was already failed to connect and the reset sequence is being aborted. */
    2380           0 :                 assert(ctrlr_ch->connect_poller == NULL);
    2381           0 :                 assert(nvme_qpair->qpair == NULL);
    2382             : 
    2383           0 :                 NVME_CTRLR_INFOLOG(nvme_qpair->ctrlr,
    2384             :                                    "qpair was already failed to connect. reset is being aborted.\n");
    2385           0 :                 return SPDK_POLLER_BUSY;
    2386             :         }
    2387             : 
    2388          59 :         qpair = nvme_qpair->qpair;
    2389          59 :         assert(qpair != NULL);
    2390             : 
    2391          59 :         if (!spdk_nvme_qpair_is_connected(qpair)) {
    2392           0 :                 return SPDK_POLLER_BUSY;
    2393             :         }
    2394             : 
    2395          59 :         NVME_CTRLR_INFOLOG(nvme_qpair->ctrlr, "qpair %p:%u was connected.\n",
    2396             :                            qpair, spdk_nvme_qpair_get_id(qpair));
    2397             : 
    2398          59 :         spdk_poller_unregister(&ctrlr_ch->connect_poller);
    2399             : 
    2400             :         /* qpair was completed to connect. Move to the next ctrlr_channel */
    2401          59 :         nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
    2402          59 :         ctrlr_ch->reset_iter = NULL;
    2403             : 
    2404          59 :         if (!g_opts.disable_auto_failback) {
    2405          44 :                 _bdev_nvme_clear_io_path_cache(nvme_qpair);
    2406          44 :         }
    2407             : 
    2408          59 :         return SPDK_POLLER_BUSY;
    2409          59 : }
    2410             : 
    2411             : static void
    2412          59 : bdev_nvme_reset_create_qpair(struct nvme_ctrlr_channel_iter *i,
    2413             :                              struct nvme_ctrlr *nvme_ctrlr,
    2414             :                              struct nvme_ctrlr_channel *ctrlr_ch,
    2415             :                              void *ctx)
    2416             : {
    2417          59 :         struct nvme_qpair *nvme_qpair = ctrlr_ch->qpair;
    2418             :         struct spdk_nvme_qpair *qpair;
    2419             :         int rc;
    2420             : 
    2421          59 :         rc = bdev_nvme_create_qpair(nvme_qpair);
    2422          59 :         if (rc == 0) {
    2423          59 :                 ctrlr_ch->connect_poller = SPDK_POLLER_REGISTER(bdev_nvme_reset_check_qpair_connected,
    2424             :                                            ctrlr_ch, 0);
    2425             : 
    2426          59 :                 qpair = nvme_qpair->qpair;
    2427             : 
    2428          59 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start checking qpair %p:%u to be connected.\n",
    2429             :                                    qpair, spdk_nvme_qpair_get_id(qpair));
    2430             : 
    2431             :                 /* The current full reset sequence will move to the next
    2432             :                  * ctrlr_channel after the qpair is actually connected.
    2433             :                  */
    2434          59 :                 assert(ctrlr_ch->reset_iter == NULL);
    2435          59 :                 ctrlr_ch->reset_iter = i;
    2436          59 :         } else {
    2437           0 :                 nvme_ctrlr_for_each_channel_continue(i, rc);
    2438             :         }
    2439          59 : }
    2440             : 
    2441             : static void
    2442          35 : nvme_ctrlr_check_namespaces(struct nvme_ctrlr *nvme_ctrlr)
    2443             : {
    2444          35 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    2445             :         struct nvme_ns *nvme_ns;
    2446             : 
    2447          55 :         for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    2448          55 :              nvme_ns != NULL;
    2449          20 :              nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
    2450          20 :                 if (!spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
    2451           1 :                         SPDK_DEBUGLOG(bdev_nvme, "NSID %u was removed during reset.\n", nvme_ns->id);
    2452             :                         /* NS can be added again. Just nullify nvme_ns->ns. */
    2453           1 :                         nvme_ns->ns = NULL;
    2454           1 :                 }
    2455          20 :         }
    2456          35 : }
    2457             : 
    2458             : 
    2459             : static int
    2460          69 : bdev_nvme_reconnect_ctrlr_poll(void *arg)
    2461             : {
    2462          69 :         struct nvme_ctrlr *nvme_ctrlr = arg;
    2463             :         struct spdk_nvme_transport_id *trid;
    2464          69 :         int rc = -ETIMEDOUT;
    2465             : 
    2466          69 :         if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
    2467             :                 /* Mark the ctrlr as failed. The next call to
    2468             :                  * spdk_nvme_ctrlr_reconnect_poll_async() will then
    2469             :                  * do the necessary cleanup and return failure.
    2470             :                  */
    2471           2 :                 spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
    2472           2 :         }
    2473             : 
    2474          69 :         rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
    2475          69 :         if (rc == -EAGAIN) {
    2476           0 :                 return SPDK_POLLER_BUSY;
    2477             :         }
    2478             : 
    2479          69 :         spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
    2480          69 :         if (rc == 0) {
    2481          35 :                 trid = &nvme_ctrlr->active_path_id->trid;
    2482             : 
    2483          35 :                 if (spdk_nvme_trtype_is_fabrics(trid->trtype)) {
    2484          35 :                         NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was connected to %s:%s. Create qpairs.\n",
    2485             :                                            trid->traddr, trid->trsvcid);
    2486          35 :                 } else {
    2487           0 :                         NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was connected. Create qpairs.\n");
    2488             :                 }
    2489             : 
    2490          35 :                 nvme_ctrlr_check_namespaces(nvme_ctrlr);
    2491             : 
    2492             :                 /* Recreate all of the I/O queue pairs */
    2493          35 :                 nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2494             :                                             bdev_nvme_reset_create_qpair,
    2495             :                                             NULL,
    2496             :                                             bdev_nvme_reset_create_qpairs_done);
    2497          35 :         } else {
    2498          34 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr could not be connected.\n");
    2499             : 
    2500          34 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2501             :         }
    2502          69 :         return SPDK_POLLER_BUSY;
    2503          69 : }
    2504             : 
    2505             : static void
    2506          69 : bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2507             : {
    2508          69 :         NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start reconnecting ctrlr.\n");
    2509             : 
    2510          69 :         spdk_nvme_ctrlr_reconnect_async(nvme_ctrlr->ctrlr);
    2511             : 
    2512             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect, nvme_ctrlr->nbdev_ctrlr->name);
    2513          69 :         assert(nvme_ctrlr->reset_detach_poller == NULL);
    2514          69 :         nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_ctrlr_poll,
    2515             :                                           nvme_ctrlr, 0);
    2516          69 : }
    2517             : 
    2518             : static void
    2519          56 : bdev_nvme_reset_destroy_qpair_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2520             : {
    2521             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reset, nvme_ctrlr->nbdev_ctrlr->name);
    2522          56 :         assert(status == 0);
    2523             : 
    2524          56 :         NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpairs were deleted.\n");
    2525             : 
    2526          56 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2527           0 :                 bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2528           0 :         } else {
    2529          56 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
    2530             :         }
    2531          56 : }
    2532             : 
    2533             : static void
    2534          56 : bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
    2535             : {
    2536          56 :         NVME_CTRLR_INFOLOG(nvme_ctrlr, "Delete qpairs for reset.\n");
    2537             : 
    2538          56 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2539             :                                     bdev_nvme_reset_destroy_qpair,
    2540             :                                     NULL,
    2541             :                                     bdev_nvme_reset_destroy_qpair_done);
    2542          56 : }
    2543             : 
    2544             : static void
    2545           3 : bdev_nvme_reconnect_ctrlr_now(void *ctx)
    2546             : {
    2547           3 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2548             : 
    2549           3 :         assert(nvme_ctrlr->resetting == true);
    2550           3 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2551             : 
    2552           3 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2553             : 
    2554           3 :         spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
    2555             : 
    2556           3 :         bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2557           3 : }
    2558             : 
    2559             : static void
    2560          56 : _bdev_nvme_reset_ctrlr(void *ctx)
    2561             : {
    2562          56 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2563             : 
    2564          56 :         assert(nvme_ctrlr->resetting == true);
    2565          56 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2566             : 
    2567          56 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2568           0 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reset_destroy_qpairs);
    2569           0 :         } else {
    2570          56 :                 bdev_nvme_reset_destroy_qpairs(nvme_ctrlr);
    2571             :         }
    2572          56 : }
    2573             : 
    2574             : static int
    2575          49 : bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2576             : {
    2577             :         spdk_msg_fn msg_fn;
    2578             : 
    2579          49 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2580          49 :         if (nvme_ctrlr->destruct) {
    2581           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2582           3 :                 return -ENXIO;
    2583             :         }
    2584             : 
    2585          46 :         if (nvme_ctrlr->resetting) {
    2586          13 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2587          13 :                 NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Unable to perform reset, already in progress.\n");
    2588          13 :                 return -EBUSY;
    2589             :         }
    2590             : 
    2591          33 :         if (nvme_ctrlr->disabled) {
    2592           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2593           1 :                 NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Unable to perform reset. Controller is disabled.\n");
    2594           1 :                 return -EALREADY;
    2595             :         }
    2596             : 
    2597          32 :         nvme_ctrlr->resetting = true;
    2598          32 :         nvme_ctrlr->dont_retry = true;
    2599             : 
    2600          32 :         if (nvme_ctrlr->reconnect_is_delayed) {
    2601           1 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "Reconnect is already scheduled.\n");
    2602           1 :                 msg_fn = bdev_nvme_reconnect_ctrlr_now;
    2603           1 :                 nvme_ctrlr->reconnect_is_delayed = false;
    2604           1 :         } else {
    2605          31 :                 msg_fn = _bdev_nvme_reset_ctrlr;
    2606          31 :                 assert(nvme_ctrlr->reset_start_tsc == 0);
    2607             :         }
    2608             : 
    2609          32 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2610             : 
    2611          32 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2612             : 
    2613          32 :         spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    2614          32 :         return 0;
    2615          49 : }
    2616             : 
    2617             : static int
    2618           3 : bdev_nvme_enable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2619             : {
    2620           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2621           3 :         if (nvme_ctrlr->destruct) {
    2622           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2623           0 :                 return -ENXIO;
    2624             :         }
    2625             : 
    2626           3 :         if (nvme_ctrlr->resetting) {
    2627           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2628           0 :                 return -EBUSY;
    2629             :         }
    2630             : 
    2631           3 :         if (!nvme_ctrlr->disabled) {
    2632           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2633           1 :                 return -EALREADY;
    2634             :         }
    2635             : 
    2636           2 :         nvme_ctrlr->disabled = false;
    2637           2 :         nvme_ctrlr->resetting = true;
    2638             : 
    2639           2 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2640             : 
    2641           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2642             : 
    2643           2 :         spdk_thread_send_msg(nvme_ctrlr->thread, bdev_nvme_reconnect_ctrlr_now, nvme_ctrlr);
    2644           2 :         return 0;
    2645           3 : }
    2646             : 
    2647             : static void
    2648           2 : _bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2649             : {
    2650           2 :         bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
    2651           2 :         void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
    2652             :         enum bdev_nvme_op_after_reset op_after_disable;
    2653             : 
    2654           2 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2655             : 
    2656           2 :         nvme_ctrlr->ctrlr_op_cb_fn = NULL;
    2657           2 :         nvme_ctrlr->ctrlr_op_cb_arg = NULL;
    2658             : 
    2659           2 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2660             : 
    2661           2 :         nvme_ctrlr->resetting = false;
    2662           2 :         nvme_ctrlr->dont_retry = false;
    2663             : 
    2664           2 :         op_after_disable = bdev_nvme_check_op_after_reset(nvme_ctrlr, true);
    2665             : 
    2666           2 :         nvme_ctrlr->disabled = true;
    2667           2 :         spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
    2668             : 
    2669           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2670             : 
    2671           2 :         if (ctrlr_op_cb_fn) {
    2672           0 :                 ctrlr_op_cb_fn(ctrlr_op_cb_arg, 0);
    2673           0 :         }
    2674             : 
    2675           2 :         switch (op_after_disable) {
    2676             :         case OP_COMPLETE_PENDING_DESTRUCT:
    2677           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    2678           0 :                 break;
    2679             :         default:
    2680           2 :                 break;
    2681             :         }
    2682             : 
    2683           2 : }
    2684             : 
    2685             : static void
    2686           2 : bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr)
    2687             : {
    2688             :         /* Make sure we clear any pending resets before returning. */
    2689           2 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2690             :                                     bdev_nvme_complete_pending_resets,
    2691             :                                     NULL,
    2692             :                                     _bdev_nvme_disable_ctrlr_complete);
    2693           2 : }
    2694             : 
    2695             : static void
    2696           1 : bdev_nvme_disable_destroy_qpairs_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2697             : {
    2698           1 :         assert(status == 0);
    2699             : 
    2700           1 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2701           0 :                 bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
    2702           0 :         } else {
    2703           1 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_ctrlr_complete);
    2704             :         }
    2705           1 : }
    2706             : 
    2707             : static void
    2708           1 : bdev_nvme_disable_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
    2709             : {
    2710           1 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2711             :                                     bdev_nvme_reset_destroy_qpair,
    2712             :                                     NULL,
    2713             :                                     bdev_nvme_disable_destroy_qpairs_done);
    2714           1 : }
    2715             : 
    2716             : static void
    2717           1 : _bdev_nvme_cancel_reconnect_and_disable_ctrlr(void *ctx)
    2718             : {
    2719           1 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2720             : 
    2721           1 :         assert(nvme_ctrlr->resetting == true);
    2722           1 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2723             : 
    2724           1 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2725             : 
    2726           1 :         bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
    2727           1 : }
    2728             : 
    2729             : static void
    2730           1 : _bdev_nvme_disconnect_and_disable_ctrlr(void *ctx)
    2731             : {
    2732           1 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2733             : 
    2734           1 :         assert(nvme_ctrlr->resetting == true);
    2735           1 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2736             : 
    2737           1 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2738           0 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_destroy_qpairs);
    2739           0 :         } else {
    2740           1 :                 bdev_nvme_disable_destroy_qpairs(nvme_ctrlr);
    2741             :         }
    2742           1 : }
    2743             : 
    2744             : static int
    2745           5 : bdev_nvme_disable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2746             : {
    2747             :         spdk_msg_fn msg_fn;
    2748             : 
    2749           5 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2750           5 :         if (nvme_ctrlr->destruct) {
    2751           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2752           1 :                 return -ENXIO;
    2753             :         }
    2754             : 
    2755           4 :         if (nvme_ctrlr->resetting) {
    2756           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2757           1 :                 return -EBUSY;
    2758             :         }
    2759             : 
    2760           3 :         if (nvme_ctrlr->disabled) {
    2761           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2762           1 :                 return -EALREADY;
    2763             :         }
    2764             : 
    2765           2 :         nvme_ctrlr->resetting = true;
    2766           2 :         nvme_ctrlr->dont_retry = true;
    2767             : 
    2768           2 :         if (nvme_ctrlr->reconnect_is_delayed) {
    2769           1 :                 msg_fn = _bdev_nvme_cancel_reconnect_and_disable_ctrlr;
    2770           1 :                 nvme_ctrlr->reconnect_is_delayed = false;
    2771           1 :         } else {
    2772           1 :                 msg_fn = _bdev_nvme_disconnect_and_disable_ctrlr;
    2773             :         }
    2774             : 
    2775           2 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2776             : 
    2777           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2778             : 
    2779           2 :         spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    2780           2 :         return 0;
    2781           5 : }
    2782             : 
    2783             : static int
    2784          31 : nvme_ctrlr_op(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
    2785             :               bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2786             : {
    2787             :         int rc;
    2788             : 
    2789          31 :         switch (op) {
    2790             :         case NVME_CTRLR_OP_RESET:
    2791          30 :                 rc = bdev_nvme_reset_ctrlr(nvme_ctrlr);
    2792          30 :                 break;
    2793             :         case NVME_CTRLR_OP_ENABLE:
    2794           0 :                 rc = bdev_nvme_enable_ctrlr(nvme_ctrlr);
    2795           0 :                 break;
    2796             :         case NVME_CTRLR_OP_DISABLE:
    2797           0 :                 rc = bdev_nvme_disable_ctrlr(nvme_ctrlr);
    2798           0 :                 break;
    2799             :         default:
    2800           1 :                 rc = -EINVAL;
    2801           1 :                 break;
    2802             :         }
    2803             : 
    2804          31 :         if (rc == 0) {
    2805          16 :                 assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
    2806          16 :                 assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
    2807          16 :                 nvme_ctrlr->ctrlr_op_cb_fn = cb_fn;
    2808          16 :                 nvme_ctrlr->ctrlr_op_cb_arg = cb_arg;
    2809          16 :         }
    2810          31 :         return rc;
    2811             : }
    2812             : 
    2813             : struct nvme_ctrlr_op_rpc_ctx {
    2814             :         struct nvme_ctrlr *nvme_ctrlr;
    2815             :         struct spdk_thread *orig_thread;
    2816             :         enum nvme_ctrlr_op op;
    2817             :         int rc;
    2818             :         bdev_nvme_ctrlr_op_cb cb_fn;
    2819             :         void *cb_arg;
    2820             : };
    2821             : 
    2822             : static void
    2823           4 : _nvme_ctrlr_op_rpc_complete(void *_ctx)
    2824             : {
    2825           4 :         struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
    2826             : 
    2827           4 :         assert(ctx != NULL);
    2828           4 :         assert(ctx->cb_fn != NULL);
    2829             : 
    2830           4 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    2831             : 
    2832           4 :         free(ctx);
    2833           4 : }
    2834             : 
    2835             : static void
    2836           4 : nvme_ctrlr_op_rpc_complete(void *cb_arg, int rc)
    2837             : {
    2838           4 :         struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
    2839             : 
    2840           4 :         ctx->rc = rc;
    2841             : 
    2842           4 :         spdk_thread_send_msg(ctx->orig_thread, _nvme_ctrlr_op_rpc_complete, ctx);
    2843           4 : }
    2844             : 
    2845             : void
    2846           4 : nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
    2847             :                   bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2848             : {
    2849             :         struct nvme_ctrlr_op_rpc_ctx *ctx;
    2850             :         int rc;
    2851             : 
    2852           4 :         assert(cb_fn != NULL);
    2853             : 
    2854           4 :         ctx = calloc(1, sizeof(*ctx));
    2855           4 :         if (ctx == NULL) {
    2856           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
    2857           0 :                 cb_fn(cb_arg, -ENOMEM);
    2858           0 :                 return;
    2859             :         }
    2860             : 
    2861           4 :         ctx->orig_thread = spdk_get_thread();
    2862           4 :         ctx->cb_fn = cb_fn;
    2863           4 :         ctx->cb_arg = cb_arg;
    2864             : 
    2865           4 :         rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_ctrlr_op_rpc_complete, ctx);
    2866           4 :         if (rc == 0) {
    2867           1 :                 return;
    2868           3 :         } else if (rc == -EALREADY) {
    2869           0 :                 rc = 0;
    2870           0 :         }
    2871             : 
    2872           3 :         nvme_ctrlr_op_rpc_complete(ctx, rc);
    2873           4 : }
    2874             : 
    2875             : static void nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc);
    2876             : 
    2877             : static void
    2878           2 : _nvme_bdev_ctrlr_op_rpc_continue(void *_ctx)
    2879             : {
    2880           2 :         struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
    2881             :         struct nvme_ctrlr *prev_nvme_ctrlr, *next_nvme_ctrlr;
    2882             :         int rc;
    2883             : 
    2884           2 :         prev_nvme_ctrlr = ctx->nvme_ctrlr;
    2885           2 :         ctx->nvme_ctrlr = NULL;
    2886             : 
    2887           2 :         if (ctx->rc != 0) {
    2888           0 :                 goto complete;
    2889             :         }
    2890             : 
    2891           2 :         next_nvme_ctrlr = TAILQ_NEXT(prev_nvme_ctrlr, tailq);
    2892           2 :         if (next_nvme_ctrlr == NULL) {
    2893           1 :                 goto complete;
    2894             :         }
    2895             : 
    2896           1 :         rc = nvme_ctrlr_op(next_nvme_ctrlr, ctx->op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2897           1 :         if (rc == 0) {
    2898           1 :                 ctx->nvme_ctrlr = next_nvme_ctrlr;
    2899           1 :                 return;
    2900           0 :         } else if (rc == -EALREADY) {
    2901           0 :                 ctx->nvme_ctrlr = next_nvme_ctrlr;
    2902           0 :                 rc = 0;
    2903           0 :         }
    2904             : 
    2905           0 :         ctx->rc = rc;
    2906             : 
    2907             : complete:
    2908           1 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    2909           1 :         free(ctx);
    2910           2 : }
    2911             : 
    2912             : static void
    2913           2 : nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc)
    2914             : {
    2915           2 :         struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
    2916             : 
    2917           2 :         ctx->rc = rc;
    2918             : 
    2919           2 :         spdk_thread_send_msg(ctx->orig_thread, _nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2920           2 : }
    2921             : 
    2922             : void
    2923           1 : nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op,
    2924             :                        bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2925             : {
    2926             :         struct nvme_ctrlr_op_rpc_ctx *ctx;
    2927             :         struct nvme_ctrlr *nvme_ctrlr;
    2928             :         int rc;
    2929             : 
    2930           1 :         assert(cb_fn != NULL);
    2931             : 
    2932           1 :         ctx = calloc(1, sizeof(*ctx));
    2933           1 :         if (ctx == NULL) {
    2934           0 :                 SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
    2935           0 :                 cb_fn(cb_arg, -ENOMEM);
    2936           0 :                 return;
    2937             :         }
    2938             : 
    2939           1 :         ctx->orig_thread = spdk_get_thread();
    2940           1 :         ctx->op = op;
    2941           1 :         ctx->cb_fn = cb_fn;
    2942           1 :         ctx->cb_arg = cb_arg;
    2943             : 
    2944           1 :         nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
    2945           1 :         assert(nvme_ctrlr != NULL);
    2946             : 
    2947           1 :         rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2948           1 :         if (rc == 0) {
    2949           1 :                 ctx->nvme_ctrlr = nvme_ctrlr;
    2950           1 :                 return;
    2951           0 :         } else if (rc == -EALREADY) {
    2952           0 :                 ctx->nvme_ctrlr = nvme_ctrlr;
    2953           0 :                 rc = 0;
    2954           0 :         }
    2955             : 
    2956           0 :         nvme_bdev_ctrlr_op_rpc_continue(ctx, rc);
    2957           1 : }
    2958             : 
    2959             : static int _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio);
    2960             : 
    2961             : static void
    2962          15 : bdev_nvme_unfreeze_bdev_channel_done(struct nvme_bdev *nbdev, void *ctx, int status)
    2963             : {
    2964          15 :         struct nvme_bdev_io *bio = ctx;
    2965             :         enum spdk_bdev_io_status io_status;
    2966             : 
    2967          15 :         if (bio->cpl.cdw0 == 0) {
    2968          11 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    2969          11 :         } else {
    2970           4 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    2971             :         }
    2972             : 
    2973          15 :         NVME_BDEV_INFOLOG(nbdev, "reset_io %p completed, status:%d\n", bio, io_status);
    2974             : 
    2975          15 :         __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), io_status, NULL);
    2976          15 : }
    2977             : 
    2978             : static void
    2979          30 : bdev_nvme_unfreeze_bdev_channel(struct nvme_bdev_channel_iter *i,
    2980             :                                 struct nvme_bdev *nbdev,
    2981             :                                 struct nvme_bdev_channel *nbdev_ch, void *ctx)
    2982             : {
    2983          30 :         bdev_nvme_abort_retry_ios(nbdev_ch);
    2984          30 :         nbdev_ch->resetting = false;
    2985             : 
    2986          30 :         nvme_bdev_for_each_channel_continue(i, 0);
    2987          30 : }
    2988             : 
    2989             : static void
    2990          15 : bdev_nvme_reset_io_complete(struct nvme_bdev_io *bio)
    2991             : {
    2992          15 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    2993          15 :         struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
    2994             : 
    2995             :         /* Abort all queued I/Os for retry. */
    2996          30 :         nvme_bdev_for_each_channel(nbdev,
    2997             :                                    bdev_nvme_unfreeze_bdev_channel,
    2998          15 :                                    bio,
    2999             :                                    bdev_nvme_unfreeze_bdev_channel_done);
    3000          15 : }
    3001             : 
    3002             : static void
    3003          25 : _bdev_nvme_reset_io_continue(void *ctx)
    3004             : {
    3005          25 :         struct nvme_bdev_io *bio = ctx;
    3006             :         struct nvme_io_path *prev_io_path, *next_io_path;
    3007             :         int rc;
    3008             : 
    3009          25 :         prev_io_path = bio->io_path;
    3010          25 :         bio->io_path = NULL;
    3011             : 
    3012          25 :         next_io_path = STAILQ_NEXT(prev_io_path, stailq);
    3013          25 :         if (next_io_path == NULL) {
    3014          15 :                 goto complete;
    3015             :         }
    3016             : 
    3017          10 :         rc = _bdev_nvme_reset_io(next_io_path, bio);
    3018          10 :         if (rc == 0) {
    3019          10 :                 return;
    3020             :         }
    3021             : 
    3022             : complete:
    3023          15 :         bdev_nvme_reset_io_complete(bio);
    3024          25 : }
    3025             : 
    3026             : static void
    3027          25 : bdev_nvme_reset_io_continue(void *cb_arg, int rc)
    3028             : {
    3029          25 :         struct nvme_bdev_io *bio = cb_arg;
    3030          25 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    3031          25 :         struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
    3032             : 
    3033          25 :         NVME_BDEV_INFOLOG(nbdev, "continue reset_io %p, rc:%d\n", bio, rc);
    3034             : 
    3035             :         /* Reset status is initialized as "failed". Set to "success" once we have at least one
    3036             :          * successfully reset nvme_ctrlr.
    3037             :          */
    3038          25 :         if (rc == 0) {
    3039          15 :                 bio->cpl.cdw0 = 0;
    3040          15 :         }
    3041             : 
    3042          25 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), _bdev_nvme_reset_io_continue, bio);
    3043          25 : }
    3044             : 
    3045             : static int
    3046          25 : _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
    3047             : {
    3048          25 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    3049          25 :         struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
    3050          25 :         struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
    3051             :         struct nvme_ctrlr_channel *ctrlr_ch;
    3052             :         int rc;
    3053             : 
    3054          25 :         assert(bio->io_path == NULL);
    3055          25 :         bio->io_path = io_path;
    3056             : 
    3057          50 :         rc = nvme_ctrlr_op(nvme_ctrlr, NVME_CTRLR_OP_RESET,
    3058          25 :                            bdev_nvme_reset_io_continue, bio);
    3059             : 
    3060          25 :         if (rc == 0) {
    3061          13 :                 NVME_BDEV_INFOLOG(nbdev, "reset_io %p started resetting ctrlr [%s, %u].\n",
    3062             :                                   bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr));
    3063          25 :         } else if (rc == -EBUSY) {
    3064          11 :                 ctrlr_ch = io_path->qpair->ctrlr_ch;
    3065          11 :                 assert(ctrlr_ch != NULL);
    3066             :                 /*
    3067             :                  * Reset call is queued only if it is from the app framework. This is on purpose so that
    3068             :                  * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
    3069             :                  * upper level. If they are in the middle of a reset, we won't try to schedule another one.
    3070             :                  */
    3071          11 :                 TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bio, retry_link);
    3072             : 
    3073          11 :                 rc = 0;
    3074             : 
    3075          11 :                 NVME_BDEV_INFOLOG(nbdev, "reset_io %p was queued to ctrlr [%s, %u].\n",
    3076             :                                   bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr));
    3077          11 :         } else {
    3078           1 :                 NVME_BDEV_INFOLOG(nbdev, "reset_io %p could not reset ctrlr [%s, %u], rc:%d\n",
    3079             :                                   bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr), rc);
    3080             :         }
    3081             : 
    3082          25 :         return rc;
    3083             : }
    3084             : 
    3085             : static void
    3086          15 : bdev_nvme_freeze_bdev_channel_done(struct nvme_bdev *nbdev, void *ctx, int status)
    3087             : {
    3088          15 :         struct nvme_bdev_io *bio = ctx;
    3089          15 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    3090             :         struct nvme_bdev_channel *nbdev_ch;
    3091             :         struct nvme_io_path *io_path;
    3092             :         int rc;
    3093             : 
    3094          15 :         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    3095             : 
    3096             :         /* Initialize with failed status. With multipath it is enough to have at least one successful
    3097             :          * nvme_ctrlr reset. If there is none, reset status will remain failed.
    3098             :          */
    3099          15 :         bio->cpl.cdw0 = 1;
    3100             : 
    3101             :         /* Reset all nvme_ctrlrs of a bdev controller sequentially. */
    3102          15 :         io_path = STAILQ_FIRST(&nbdev_ch->io_path_list);
    3103          15 :         assert(io_path != NULL);
    3104             : 
    3105          15 :         rc = _bdev_nvme_reset_io(io_path, bio);
    3106          15 :         if (rc != 0) {
    3107             :                 /* If the current nvme_ctrlr is disabled, skip it and move to the next nvme_ctrlr. */
    3108           1 :                 rc = (rc == -EALREADY) ? 0 : rc;
    3109             : 
    3110           1 :                 bdev_nvme_reset_io_continue(bio, rc);
    3111           1 :         }
    3112          15 : }
    3113             : 
    3114             : static void
    3115          30 : bdev_nvme_freeze_bdev_channel(struct nvme_bdev_channel_iter *i,
    3116             :                               struct nvme_bdev *nbdev,
    3117             :                               struct nvme_bdev_channel *nbdev_ch, void *ctx)
    3118             : {
    3119          30 :         nbdev_ch->resetting = true;
    3120             : 
    3121          30 :         nvme_bdev_for_each_channel_continue(i, 0);
    3122          30 : }
    3123             : 
    3124             : static void
    3125          15 : bdev_nvme_reset_io(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio)
    3126             : {
    3127          15 :         NVME_BDEV_INFOLOG(nbdev, "reset_io %p started.\n", bio);
    3128             : 
    3129          30 :         nvme_bdev_for_each_channel(nbdev,
    3130             :                                    bdev_nvme_freeze_bdev_channel,
    3131          15 :                                    bio,
    3132             :                                    bdev_nvme_freeze_bdev_channel_done);
    3133          15 : }
    3134             : 
    3135             : static int
    3136          31 : bdev_nvme_failover_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool remove)
    3137             : {
    3138          31 :         if (nvme_ctrlr->destruct) {
    3139             :                 /* Don't bother resetting if the controller is in the process of being destructed. */
    3140           2 :                 return -ENXIO;
    3141             :         }
    3142             : 
    3143          29 :         if (nvme_ctrlr->resetting) {
    3144           3 :                 if (!nvme_ctrlr->in_failover) {
    3145           3 :                         NVME_CTRLR_NOTICELOG(nvme_ctrlr,
    3146             :                                              "Reset is already in progress. Defer failover until reset completes.\n");
    3147             : 
    3148             :                         /* Defer failover until reset completes. */
    3149           3 :                         nvme_ctrlr->pending_failover = true;
    3150           3 :                         return -EINPROGRESS;
    3151             :                 } else {
    3152           0 :                         NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Unable to perform failover, already in progress.\n");
    3153           0 :                         return -EBUSY;
    3154             :                 }
    3155             :         }
    3156             : 
    3157          26 :         bdev_nvme_failover_trid(nvme_ctrlr, remove, true);
    3158             : 
    3159          26 :         if (nvme_ctrlr->reconnect_is_delayed) {
    3160           1 :                 NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Reconnect is already scheduled.\n");
    3161             : 
    3162             :                 /* We rely on the next reconnect for the failover. */
    3163           1 :                 return -EALREADY;
    3164             :         }
    3165             : 
    3166          25 :         if (nvme_ctrlr->disabled) {
    3167           0 :                 NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Controller is disabled.\n");
    3168             : 
    3169             :                 /* We rely on the enablement for the failover. */
    3170           0 :                 return -EALREADY;
    3171             :         }
    3172             : 
    3173          25 :         nvme_ctrlr->resetting = true;
    3174          25 :         nvme_ctrlr->in_failover = true;
    3175             : 
    3176          25 :         assert(nvme_ctrlr->reset_start_tsc == 0);
    3177          25 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    3178             : 
    3179          25 :         return 0;
    3180          31 : }
    3181             : 
    3182             : static int
    3183          29 : bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    3184             : {
    3185             :         int rc;
    3186             : 
    3187          29 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    3188          29 :         rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, false);
    3189          29 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    3190             : 
    3191          29 :         if (rc == 0) {
    3192          24 :                 spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset_ctrlr, nvme_ctrlr);
    3193          29 :         } else if (rc == -EALREADY) {
    3194           0 :                 rc = 0;
    3195           0 :         }
    3196             : 
    3197          29 :         return rc;
    3198             : }
    3199             : 
    3200             : static int bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks,
    3201             :                            uint64_t num_blocks);
    3202             : 
    3203             : static int bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks,
    3204             :                                   uint64_t num_blocks);
    3205             : 
    3206             : static int bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks,
    3207             :                           uint64_t src_offset_blocks,
    3208             :                           uint64_t num_blocks);
    3209             : 
    3210             : static void
    3211           1 : bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
    3212             :                      bool success)
    3213             : {
    3214           1 :         struct nvme_bdev_io *bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    3215             :         int ret;
    3216             : 
    3217           1 :         if (!success) {
    3218           0 :                 ret = -EINVAL;
    3219           0 :                 goto exit;
    3220             :         }
    3221             : 
    3222           1 :         if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
    3223           0 :                 ret = -ENXIO;
    3224           0 :                 goto exit;
    3225             :         }
    3226             : 
    3227           2 :         ret = bdev_nvme_readv(bio,
    3228           1 :                               bdev_io->u.bdev.iovs,
    3229           1 :                               bdev_io->u.bdev.iovcnt,
    3230           1 :                               bdev_io->u.bdev.md_buf,
    3231           1 :                               bdev_io->u.bdev.num_blocks,
    3232           1 :                               bdev_io->u.bdev.offset_blocks,
    3233           1 :                               bdev_io->u.bdev.dif_check_flags,
    3234           1 :                               bdev_io->u.bdev.memory_domain,
    3235           1 :                               bdev_io->u.bdev.memory_domain_ctx,
    3236           1 :                               bdev_io->u.bdev.accel_sequence);
    3237             : 
    3238             : exit:
    3239           1 :         if (spdk_unlikely(ret != 0)) {
    3240           0 :                 bdev_nvme_io_complete(bio, ret);
    3241           0 :         }
    3242           1 : }
    3243             : 
    3244             : static inline void
    3245          59 : _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
    3246             : {
    3247          59 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    3248          59 :         struct spdk_bdev *bdev = bdev_io->bdev;
    3249             :         struct nvme_bdev_io *nbdev_io_to_abort;
    3250          59 :         int rc = 0;
    3251             : 
    3252          59 :         switch (bdev_io->type) {
    3253             :         case SPDK_BDEV_IO_TYPE_READ:
    3254           3 :                 if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) {
    3255             : 
    3256           4 :                         rc = bdev_nvme_readv(nbdev_io,
    3257           2 :                                              bdev_io->u.bdev.iovs,
    3258           2 :                                              bdev_io->u.bdev.iovcnt,
    3259           2 :                                              bdev_io->u.bdev.md_buf,
    3260           2 :                                              bdev_io->u.bdev.num_blocks,
    3261           2 :                                              bdev_io->u.bdev.offset_blocks,
    3262           2 :                                              bdev_io->u.bdev.dif_check_flags,
    3263           2 :                                              bdev_io->u.bdev.memory_domain,
    3264           2 :                                              bdev_io->u.bdev.memory_domain_ctx,
    3265           2 :                                              bdev_io->u.bdev.accel_sequence);
    3266           2 :                 } else {
    3267           2 :                         spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
    3268           1 :                                              bdev_io->u.bdev.num_blocks * bdev->blocklen);
    3269           1 :                         rc = 0;
    3270             :                 }
    3271           3 :                 break;
    3272             :         case SPDK_BDEV_IO_TYPE_WRITE:
    3273          50 :                 rc = bdev_nvme_writev(nbdev_io,
    3274          25 :                                       bdev_io->u.bdev.iovs,
    3275          25 :                                       bdev_io->u.bdev.iovcnt,
    3276          25 :                                       bdev_io->u.bdev.md_buf,
    3277          25 :                                       bdev_io->u.bdev.num_blocks,
    3278          25 :                                       bdev_io->u.bdev.offset_blocks,
    3279          25 :                                       bdev_io->u.bdev.dif_check_flags,
    3280          25 :                                       bdev_io->u.bdev.memory_domain,
    3281          25 :                                       bdev_io->u.bdev.memory_domain_ctx,
    3282          25 :                                       bdev_io->u.bdev.accel_sequence,
    3283          25 :                                       bdev_io->u.bdev.nvme_cdw12,
    3284          25 :                                       bdev_io->u.bdev.nvme_cdw13);
    3285          25 :                 break;
    3286             :         case SPDK_BDEV_IO_TYPE_COMPARE:
    3287           2 :                 rc = bdev_nvme_comparev(nbdev_io,
    3288           1 :                                         bdev_io->u.bdev.iovs,
    3289           1 :                                         bdev_io->u.bdev.iovcnt,
    3290           1 :                                         bdev_io->u.bdev.md_buf,
    3291           1 :                                         bdev_io->u.bdev.num_blocks,
    3292           1 :                                         bdev_io->u.bdev.offset_blocks,
    3293           1 :                                         bdev_io->u.bdev.dif_check_flags);
    3294           1 :                 break;
    3295             :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
    3296           4 :                 rc = bdev_nvme_comparev_and_writev(nbdev_io,
    3297           2 :                                                    bdev_io->u.bdev.iovs,
    3298           2 :                                                    bdev_io->u.bdev.iovcnt,
    3299           2 :                                                    bdev_io->u.bdev.fused_iovs,
    3300           2 :                                                    bdev_io->u.bdev.fused_iovcnt,
    3301           2 :                                                    bdev_io->u.bdev.md_buf,
    3302           2 :                                                    bdev_io->u.bdev.num_blocks,
    3303           2 :                                                    bdev_io->u.bdev.offset_blocks,
    3304           2 :                                                    bdev_io->u.bdev.dif_check_flags);
    3305           2 :                 break;
    3306             :         case SPDK_BDEV_IO_TYPE_UNMAP:
    3307           2 :                 rc = bdev_nvme_unmap(nbdev_io,
    3308           1 :                                      bdev_io->u.bdev.offset_blocks,
    3309           1 :                                      bdev_io->u.bdev.num_blocks);
    3310           1 :                 break;
    3311             :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
    3312           0 :                 rc =  bdev_nvme_write_zeroes(nbdev_io,
    3313           0 :                                              bdev_io->u.bdev.offset_blocks,
    3314           0 :                                              bdev_io->u.bdev.num_blocks);
    3315           0 :                 break;
    3316             :         case SPDK_BDEV_IO_TYPE_RESET:
    3317          15 :                 nbdev_io->io_path = NULL;
    3318          15 :                 bdev_nvme_reset_io(bdev->ctxt, nbdev_io);
    3319          15 :                 return;
    3320             : 
    3321             :         case SPDK_BDEV_IO_TYPE_FLUSH:
    3322           1 :                 bdev_nvme_io_complete(nbdev_io, 0);
    3323           1 :                 return;
    3324             : 
    3325             :         case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
    3326           0 :                 rc = bdev_nvme_zone_appendv(nbdev_io,
    3327           0 :                                             bdev_io->u.bdev.iovs,
    3328           0 :                                             bdev_io->u.bdev.iovcnt,
    3329           0 :                                             bdev_io->u.bdev.md_buf,
    3330           0 :                                             bdev_io->u.bdev.num_blocks,
    3331           0 :                                             bdev_io->u.bdev.offset_blocks,
    3332           0 :                                             bdev_io->u.bdev.dif_check_flags);
    3333           0 :                 break;
    3334             :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
    3335           0 :                 rc = bdev_nvme_get_zone_info(nbdev_io,
    3336           0 :                                              bdev_io->u.zone_mgmt.zone_id,
    3337           0 :                                              bdev_io->u.zone_mgmt.num_zones,
    3338           0 :                                              bdev_io->u.zone_mgmt.buf);
    3339           0 :                 break;
    3340             :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
    3341           0 :                 rc = bdev_nvme_zone_management(nbdev_io,
    3342           0 :                                                bdev_io->u.zone_mgmt.zone_id,
    3343           0 :                                                bdev_io->u.zone_mgmt.zone_action);
    3344           0 :                 break;
    3345             :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3346           5 :                 nbdev_io->io_path = NULL;
    3347          10 :                 bdev_nvme_admin_passthru(nbdev_ch,
    3348           5 :                                          nbdev_io,
    3349           5 :                                          &bdev_io->u.nvme_passthru.cmd,
    3350           5 :                                          bdev_io->u.nvme_passthru.buf,
    3351           5 :                                          bdev_io->u.nvme_passthru.nbytes);
    3352           5 :                 return;
    3353             : 
    3354             :         case SPDK_BDEV_IO_TYPE_NVME_IO:
    3355           0 :                 rc = bdev_nvme_io_passthru(nbdev_io,
    3356           0 :                                            &bdev_io->u.nvme_passthru.cmd,
    3357           0 :                                            bdev_io->u.nvme_passthru.buf,
    3358           0 :                                            bdev_io->u.nvme_passthru.nbytes);
    3359           0 :                 break;
    3360             :         case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3361           0 :                 rc = bdev_nvme_io_passthru_md(nbdev_io,
    3362           0 :                                               &bdev_io->u.nvme_passthru.cmd,
    3363           0 :                                               bdev_io->u.nvme_passthru.buf,
    3364           0 :                                               bdev_io->u.nvme_passthru.nbytes,
    3365           0 :                                               bdev_io->u.nvme_passthru.md_buf,
    3366           0 :                                               bdev_io->u.nvme_passthru.md_len);
    3367           0 :                 break;
    3368             :         case SPDK_BDEV_IO_TYPE_NVME_IOV_MD:
    3369           0 :                 rc = bdev_nvme_iov_passthru_md(nbdev_io,
    3370           0 :                                                &bdev_io->u.nvme_passthru.cmd,
    3371           0 :                                                bdev_io->u.nvme_passthru.iovs,
    3372           0 :                                                bdev_io->u.nvme_passthru.iovcnt,
    3373           0 :                                                bdev_io->u.nvme_passthru.nbytes,
    3374           0 :                                                bdev_io->u.nvme_passthru.md_buf,
    3375           0 :                                                bdev_io->u.nvme_passthru.md_len);
    3376           0 :                 break;
    3377             :         case SPDK_BDEV_IO_TYPE_ABORT:
    3378           6 :                 nbdev_io->io_path = NULL;
    3379           6 :                 nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
    3380          12 :                 bdev_nvme_abort(nbdev_ch,
    3381           6 :                                 nbdev_io,
    3382           6 :                                 nbdev_io_to_abort);
    3383           6 :                 return;
    3384             : 
    3385             :         case SPDK_BDEV_IO_TYPE_COPY:
    3386           0 :                 rc = bdev_nvme_copy(nbdev_io,
    3387           0 :                                     bdev_io->u.bdev.offset_blocks,
    3388           0 :                                     bdev_io->u.bdev.copy.src_offset_blocks,
    3389           0 :                                     bdev_io->u.bdev.num_blocks);
    3390           0 :                 break;
    3391             :         default:
    3392           0 :                 rc = -EINVAL;
    3393           0 :                 break;
    3394             :         }
    3395             : 
    3396          32 :         if (spdk_unlikely(rc != 0)) {
    3397           0 :                 bdev_nvme_io_complete(nbdev_io, rc);
    3398           0 :         }
    3399          59 : }
    3400             : 
    3401             : static void
    3402          68 : bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
    3403             : {
    3404          68 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
    3405          68 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    3406             : 
    3407          68 :         if (spdk_likely(nbdev_io->submit_tsc == 0)) {
    3408          68 :                 nbdev_io->submit_tsc = spdk_bdev_io_get_submit_tsc(bdev_io);
    3409          68 :         } else {
    3410             :                 /* There are cases where submit_tsc != 0, i.e. retry I/O.
    3411             :                  * We need to update submit_tsc here.
    3412             :                  */
    3413           0 :                 nbdev_io->submit_tsc = spdk_get_ticks();
    3414             :         }
    3415             : 
    3416          68 :         spdk_trace_record(TRACE_BDEV_NVME_IO_START, 0, 0, (uintptr_t)nbdev_io, (uintptr_t)bdev_io);
    3417          68 :         nbdev_io->io_path = bdev_nvme_find_io_path(nbdev_ch);
    3418          68 :         if (spdk_unlikely(!nbdev_io->io_path)) {
    3419          13 :                 if (!bdev_nvme_io_type_is_admin(bdev_io->type)) {
    3420          12 :                         bdev_nvme_io_complete(nbdev_io, -ENXIO);
    3421          12 :                         return;
    3422             :                 }
    3423             : 
    3424             :                 /* Admin commands do not use the optimal I/O path.
    3425             :                  * Simply fall through even if it is not found.
    3426             :                  */
    3427           1 :         }
    3428             : 
    3429          56 :         _bdev_nvme_submit_request(nbdev_ch, bdev_io);
    3430          68 : }
    3431             : 
    3432             : static bool
    3433           0 : bdev_nvme_is_supported_csi(enum spdk_nvme_csi csi)
    3434             : {
    3435           0 :         switch (csi) {
    3436             :         case SPDK_NVME_CSI_NVM:
    3437           0 :                 return true;
    3438             :         case SPDK_NVME_CSI_ZNS:
    3439           0 :                 return true;
    3440             :         default:
    3441           0 :                 return false;
    3442             :         }
    3443           0 : }
    3444             : 
    3445             : static bool
    3446           0 : bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
    3447             : {
    3448           0 :         struct nvme_bdev *nbdev = ctx;
    3449             :         struct nvme_ns *nvme_ns;
    3450             :         struct spdk_nvme_ns *ns;
    3451             :         struct spdk_nvme_ctrlr *ctrlr;
    3452             :         const struct spdk_nvme_ctrlr_data *cdata;
    3453             : 
    3454           0 :         nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    3455           0 :         assert(nvme_ns != NULL);
    3456           0 :         ns = nvme_ns->ns;
    3457           0 :         if (ns == NULL) {
    3458           0 :                 return false;
    3459             :         }
    3460             : 
    3461           0 :         if (!bdev_nvme_is_supported_csi(spdk_nvme_ns_get_csi(ns))) {
    3462           0 :                 switch (io_type) {
    3463             :                 case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3464             :                 case SPDK_BDEV_IO_TYPE_NVME_IO:
    3465           0 :                         return true;
    3466             : 
    3467             :                 case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3468           0 :                         return spdk_nvme_ns_get_md_size(ns) ? true : false;
    3469             : 
    3470             :                 default:
    3471           0 :                         return false;
    3472             :                 }
    3473             :         }
    3474             : 
    3475           0 :         ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    3476             : 
    3477           0 :         switch (io_type) {
    3478             :         case SPDK_BDEV_IO_TYPE_READ:
    3479             :         case SPDK_BDEV_IO_TYPE_WRITE:
    3480             :         case SPDK_BDEV_IO_TYPE_RESET:
    3481             :         case SPDK_BDEV_IO_TYPE_FLUSH:
    3482             :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3483             :         case SPDK_BDEV_IO_TYPE_NVME_IO:
    3484             :         case SPDK_BDEV_IO_TYPE_ABORT:
    3485           0 :                 return true;
    3486             : 
    3487             :         case SPDK_BDEV_IO_TYPE_COMPARE:
    3488           0 :                 return spdk_nvme_ns_supports_compare(ns);
    3489             : 
    3490             :         case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3491           0 :                 return spdk_nvme_ns_get_md_size(ns) ? true : false;
    3492             : 
    3493             :         case SPDK_BDEV_IO_TYPE_UNMAP:
    3494           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3495           0 :                 return cdata->oncs.dsm;
    3496             : 
    3497             :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
    3498           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3499           0 :                 return cdata->oncs.write_zeroes;
    3500             : 
    3501             :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
    3502           0 :                 if (spdk_nvme_ctrlr_get_flags(ctrlr) &
    3503             :                     SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) {
    3504           0 :                         return true;
    3505             :                 }
    3506           0 :                 return false;
    3507             : 
    3508             :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
    3509             :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
    3510           0 :                 return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS;
    3511             : 
    3512             :         case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
    3513           0 :                 return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS &&
    3514           0 :                        spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
    3515             : 
    3516             :         case SPDK_BDEV_IO_TYPE_COPY:
    3517           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3518           0 :                 return cdata->oncs.copy;
    3519             : 
    3520             :         default:
    3521           0 :                 return false;
    3522             :         }
    3523           0 : }
    3524             : 
    3525             : static int
    3526          59 : nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrlr_ch)
    3527             : {
    3528             :         struct nvme_qpair *nvme_qpair;
    3529             :         struct spdk_io_channel *pg_ch;
    3530             :         int rc;
    3531             : 
    3532          59 :         nvme_qpair = calloc(1, sizeof(*nvme_qpair));
    3533          59 :         if (!nvme_qpair) {
    3534           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to alloc nvme_qpair.\n");
    3535           0 :                 return -1;
    3536             :         }
    3537             : 
    3538          59 :         TAILQ_INIT(&nvme_qpair->io_path_list);
    3539             : 
    3540          59 :         nvme_qpair->ctrlr = nvme_ctrlr;
    3541          59 :         nvme_qpair->ctrlr_ch = ctrlr_ch;
    3542             : 
    3543          59 :         pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);
    3544          59 :         if (!pg_ch) {
    3545           0 :                 free(nvme_qpair);
    3546           0 :                 return -1;
    3547             :         }
    3548             : 
    3549          59 :         nvme_qpair->group = spdk_io_channel_get_ctx(pg_ch);
    3550             : 
    3551             : #ifdef SPDK_CONFIG_VTUNE
    3552             :         nvme_qpair->group->collect_spin_stat = true;
    3553             : #else
    3554          59 :         nvme_qpair->group->collect_spin_stat = false;
    3555             : #endif
    3556             : 
    3557          59 :         if (!nvme_ctrlr->disabled) {
    3558             :                 /* If a nvme_ctrlr is disabled, don't try to create qpair for it. Qpair will
    3559             :                  * be created when it's enabled.
    3560             :                  */
    3561          59 :                 rc = bdev_nvme_create_qpair(nvme_qpair);
    3562          59 :                 if (rc != 0) {
    3563             :                         /* nvme_ctrlr can't create IO qpair if connection is down.
    3564             :                          * If reconnect_delay_sec is non-zero, creating IO qpair is retried
    3565             :                          * after reconnect_delay_sec seconds. If bdev_retry_count is non-zero,
    3566             :                          * submitted IO will be queued until IO qpair is successfully created.
    3567             :                          *
    3568             :                          * Hence, if both are satisfied, ignore the failure.
    3569             :                          */
    3570           0 :                         if (nvme_ctrlr->opts.reconnect_delay_sec == 0 || g_opts.bdev_retry_count == 0) {
    3571           0 :                                 spdk_put_io_channel(pg_ch);
    3572           0 :                                 free(nvme_qpair);
    3573           0 :                                 return rc;
    3574             :                         }
    3575           0 :                 }
    3576          59 :         }
    3577             : 
    3578          59 :         TAILQ_INSERT_TAIL(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
    3579             : 
    3580          59 :         ctrlr_ch->qpair = nvme_qpair;
    3581             : 
    3582          59 :         pthread_mutex_lock(&nvme_qpair->ctrlr->mutex);
    3583          59 :         nvme_qpair->ctrlr->ref++;
    3584          59 :         pthread_mutex_unlock(&nvme_qpair->ctrlr->mutex);
    3585             : 
    3586          59 :         return 0;
    3587          59 : }
    3588             : 
    3589             : static int
    3590          59 : bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
    3591             : {
    3592          59 :         struct nvme_ctrlr *nvme_ctrlr = io_device;
    3593          59 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
    3594             : 
    3595          59 :         TAILQ_INIT(&ctrlr_ch->pending_resets);
    3596             : 
    3597          59 :         return nvme_qpair_create(nvme_ctrlr, ctrlr_ch);
    3598             : }
    3599             : 
    3600             : static void
    3601          59 : nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
    3602             : {
    3603             :         struct nvme_io_path *io_path, *next;
    3604             : 
    3605          59 :         assert(nvme_qpair->group != NULL);
    3606             : 
    3607          96 :         TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
    3608          37 :                 TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
    3609          37 :                 nvme_io_path_free(io_path);
    3610          37 :         }
    3611             : 
    3612          59 :         TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
    3613             : 
    3614          59 :         spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
    3615             : 
    3616          59 :         nvme_ctrlr_release(nvme_qpair->ctrlr);
    3617             : 
    3618          59 :         free(nvme_qpair);
    3619          59 : }
    3620             : 
    3621             : static void
    3622          59 : bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
    3623             : {
    3624          59 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
    3625             :         struct nvme_qpair *nvme_qpair;
    3626             : 
    3627          59 :         nvme_qpair = ctrlr_ch->qpair;
    3628          59 :         assert(nvme_qpair != NULL);
    3629             : 
    3630          59 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    3631             : 
    3632          59 :         if (nvme_qpair->qpair != NULL) {
    3633          45 :                 if (ctrlr_ch->reset_iter == NULL) {
    3634          45 :                         spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
    3635          45 :                 } else {
    3636             :                         /* Skip current ctrlr_channel in a full reset sequence because
    3637             :                          * it is being deleted now. The qpair is already being disconnected.
    3638             :                          * We do not have to restart disconnecting it.
    3639             :                          */
    3640           0 :                         nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
    3641             :                 }
    3642             : 
    3643             :                 /* We cannot release a reference to the poll group now.
    3644             :                  * The qpair may be disconnected asynchronously later.
    3645             :                  * We need to poll it until it is actually disconnected.
    3646             :                  * Just detach the qpair from the deleting ctrlr_channel.
    3647             :                  */
    3648          45 :                 nvme_qpair->ctrlr_ch = NULL;
    3649          45 :         } else {
    3650          14 :                 assert(ctrlr_ch->reset_iter == NULL);
    3651             : 
    3652          14 :                 nvme_qpair_delete(nvme_qpair);
    3653             :         }
    3654          59 : }
    3655             : 
    3656             : static inline struct spdk_io_channel *
    3657           0 : bdev_nvme_get_accel_channel(struct nvme_poll_group *group)
    3658             : {
    3659           0 :         if (spdk_unlikely(!group->accel_channel)) {
    3660           0 :                 group->accel_channel = spdk_accel_get_io_channel();
    3661           0 :                 if (!group->accel_channel) {
    3662           0 :                         SPDK_ERRLOG("Cannot get the accel_channel for bdev nvme polling group=%p\n",
    3663             :                                     group);
    3664           0 :                         return NULL;
    3665             :                 }
    3666           0 :         }
    3667             : 
    3668           0 :         return group->accel_channel;
    3669           0 : }
    3670             : 
    3671             : static void
    3672           0 : bdev_nvme_finish_sequence(void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
    3673             : {
    3674           0 :         spdk_accel_sequence_finish(seq, cb_fn, cb_arg);
    3675           0 : }
    3676             : 
    3677             : static void
    3678           0 : bdev_nvme_abort_sequence(void *seq)
    3679             : {
    3680           0 :         spdk_accel_sequence_abort(seq);
    3681           0 : }
    3682             : 
    3683             : static void
    3684           0 : bdev_nvme_reverse_sequence(void *seq)
    3685             : {
    3686           0 :         spdk_accel_sequence_reverse(seq);
    3687           0 : }
    3688             : 
    3689             : static int
    3690           0 : bdev_nvme_append_crc32c(void *ctx, void **seq, uint32_t *dst, struct iovec *iovs, uint32_t iovcnt,
    3691             :                         struct spdk_memory_domain *domain, void *domain_ctx, uint32_t seed,
    3692             :                         spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
    3693             : {
    3694             :         struct spdk_io_channel *ch;
    3695           0 :         struct nvme_poll_group *group = ctx;
    3696             : 
    3697           0 :         ch = bdev_nvme_get_accel_channel(group);
    3698           0 :         if (spdk_unlikely(ch == NULL)) {
    3699           0 :                 return -ENOMEM;
    3700             :         }
    3701             : 
    3702           0 :         return spdk_accel_append_crc32c((struct spdk_accel_sequence **)seq, ch, dst, iovs, iovcnt,
    3703           0 :                                         domain, domain_ctx, seed, cb_fn, cb_arg);
    3704           0 : }
    3705             : 
    3706             : static int
    3707           0 : bdev_nvme_append_copy(void *ctx, void **seq, struct iovec *dst_iovs, uint32_t dst_iovcnt,
    3708             :                       struct spdk_memory_domain *dst_domain, void *dst_domain_ctx,
    3709             :                       struct iovec *src_iovs, uint32_t src_iovcnt,
    3710             :                       struct spdk_memory_domain *src_domain, void *src_domain_ctx,
    3711             :                       spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
    3712             : {
    3713             :         struct spdk_io_channel *ch;
    3714           0 :         struct nvme_poll_group *group = ctx;
    3715             : 
    3716           0 :         ch = bdev_nvme_get_accel_channel(group);
    3717           0 :         if (spdk_unlikely(ch == NULL)) {
    3718           0 :                 return -ENOMEM;
    3719             :         }
    3720             : 
    3721           0 :         return spdk_accel_append_copy((struct spdk_accel_sequence **)seq, ch,
    3722           0 :                                       dst_iovs, dst_iovcnt, dst_domain, dst_domain_ctx,
    3723           0 :                                       src_iovs, src_iovcnt, src_domain, src_domain_ctx,
    3724           0 :                                       cb_fn, cb_arg);
    3725           0 : }
    3726             : 
    3727             : static struct spdk_nvme_accel_fn_table g_bdev_nvme_accel_fn_table = {
    3728             :         .table_size             = sizeof(struct spdk_nvme_accel_fn_table),
    3729             :         .append_crc32c          = bdev_nvme_append_crc32c,
    3730             :         .append_copy            = bdev_nvme_append_copy,
    3731             :         .finish_sequence        = bdev_nvme_finish_sequence,
    3732             :         .reverse_sequence       = bdev_nvme_reverse_sequence,
    3733             :         .abort_sequence         = bdev_nvme_abort_sequence,
    3734             : };
    3735             : 
    3736             : static int
    3737          44 : bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf)
    3738             : {
    3739          44 :         struct nvme_poll_group *group = ctx_buf;
    3740             : 
    3741          44 :         TAILQ_INIT(&group->qpair_list);
    3742             : 
    3743          44 :         group->group = spdk_nvme_poll_group_create(group, &g_bdev_nvme_accel_fn_table);
    3744          44 :         if (group->group == NULL) {
    3745           0 :                 return -1;
    3746             :         }
    3747             : 
    3748          44 :         group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us);
    3749             : 
    3750          44 :         if (group->poller == NULL) {
    3751           0 :                 spdk_nvme_poll_group_destroy(group->group);
    3752           0 :                 return -1;
    3753             :         }
    3754             : 
    3755          44 :         return 0;
    3756          44 : }
    3757             : 
    3758             : static void
    3759          44 : bdev_nvme_destroy_poll_group_cb(void *io_device, void *ctx_buf)
    3760             : {
    3761          44 :         struct nvme_poll_group *group = ctx_buf;
    3762             : 
    3763          44 :         assert(TAILQ_EMPTY(&group->qpair_list));
    3764             : 
    3765          44 :         if (group->accel_channel) {
    3766           0 :                 spdk_put_io_channel(group->accel_channel);
    3767           0 :         }
    3768             : 
    3769          44 :         spdk_poller_unregister(&group->poller);
    3770          44 :         if (spdk_nvme_poll_group_destroy(group->group)) {
    3771           0 :                 SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n");
    3772           0 :                 assert(false);
    3773             :         }
    3774          44 : }
    3775             : 
    3776             : static struct spdk_io_channel *
    3777           0 : bdev_nvme_get_io_channel(void *ctx)
    3778             : {
    3779           0 :         struct nvme_bdev *nvme_bdev = ctx;
    3780             : 
    3781           0 :         return spdk_get_io_channel(nvme_bdev);
    3782             : }
    3783             : 
    3784             : static void *
    3785           0 : bdev_nvme_get_module_ctx(void *ctx)
    3786             : {
    3787           0 :         struct nvme_bdev *nvme_bdev = ctx;
    3788             :         struct nvme_ns *nvme_ns;
    3789             : 
    3790           0 :         if (!nvme_bdev || nvme_bdev->disk.module != &nvme_if) {
    3791           0 :                 return NULL;
    3792             :         }
    3793             : 
    3794           0 :         nvme_ns = TAILQ_FIRST(&nvme_bdev->nvme_ns_list);
    3795           0 :         if (!nvme_ns) {
    3796           0 :                 return NULL;
    3797             :         }
    3798             : 
    3799           0 :         return nvme_ns->ns;
    3800           0 : }
    3801             : 
    3802             : static const char *
    3803           0 : _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state)
    3804             : {
    3805           0 :         switch (ana_state) {
    3806             :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    3807           0 :                 return "optimized";
    3808             :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    3809           0 :                 return "non_optimized";
    3810             :         case SPDK_NVME_ANA_INACCESSIBLE_STATE:
    3811           0 :                 return "inaccessible";
    3812             :         case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
    3813           0 :                 return "persistent_loss";
    3814             :         case SPDK_NVME_ANA_CHANGE_STATE:
    3815           0 :                 return "change";
    3816             :         default:
    3817           0 :                 return NULL;
    3818             :         }
    3819           0 : }
    3820             : 
    3821             : static int
    3822           8 : bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
    3823             : {
    3824           8 :         struct spdk_memory_domain **_domains = NULL;
    3825           8 :         struct nvme_bdev *nbdev = ctx;
    3826             :         struct nvme_ns *nvme_ns;
    3827           8 :         int i = 0, _array_size = array_size;
    3828           8 :         int rc = 0;
    3829             : 
    3830          22 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
    3831          14 :                 if (domains && array_size >= i) {
    3832          11 :                         _domains = &domains[i];
    3833          11 :                 } else {
    3834           3 :                         _domains = NULL;
    3835             :                 }
    3836          14 :                 rc = spdk_nvme_ctrlr_get_memory_domains(nvme_ns->ctrlr->ctrlr, _domains, _array_size);
    3837          14 :                 if (rc > 0) {
    3838          13 :                         i += rc;
    3839          13 :                         if (_array_size >= rc) {
    3840           9 :                                 _array_size -= rc;
    3841           9 :                         } else {
    3842           4 :                                 _array_size = 0;
    3843             :                         }
    3844          14 :                 } else if (rc < 0) {
    3845           0 :                         return rc;
    3846             :                 }
    3847          14 :         }
    3848             : 
    3849           8 :         return i;
    3850           8 : }
    3851             : 
    3852             : static const char *
    3853           0 : nvme_ctrlr_get_state_str(struct nvme_ctrlr *nvme_ctrlr)
    3854             : {
    3855           0 :         if (nvme_ctrlr->destruct) {
    3856           0 :                 return "deleting";
    3857           0 :         } else if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
    3858           0 :                 return "failed";
    3859           0 :         } else if (nvme_ctrlr->resetting) {
    3860           0 :                 return "resetting";
    3861           0 :         } else if (nvme_ctrlr->reconnect_is_delayed > 0) {
    3862           0 :                 return "reconnect_is_delayed";
    3863           0 :         } else if (nvme_ctrlr->disabled) {
    3864           0 :                 return "disabled";
    3865             :         } else {
    3866           0 :                 return "enabled";
    3867             :         }
    3868           0 : }
    3869             : 
    3870             : void
    3871           0 : nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr)
    3872             : {
    3873             :         struct spdk_nvme_transport_id *trid;
    3874             :         const struct spdk_nvme_ctrlr_opts *opts;
    3875             :         const struct spdk_nvme_ctrlr_data *cdata;
    3876             :         struct nvme_path_id *path_id;
    3877             :         int32_t numa_id;
    3878             : 
    3879           0 :         spdk_json_write_object_begin(w);
    3880             : 
    3881           0 :         spdk_json_write_named_string(w, "state", nvme_ctrlr_get_state_str(nvme_ctrlr));
    3882             : 
    3883             : #ifdef SPDK_CONFIG_NVME_CUSE
    3884             :         size_t cuse_name_size = 128;
    3885             :         char cuse_name[cuse_name_size];
    3886             : 
    3887             :         int rc = spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr, cuse_name, &cuse_name_size);
    3888             :         if (rc == 0) {
    3889             :                 spdk_json_write_named_string(w, "cuse_device", cuse_name);
    3890             :         }
    3891             : #endif
    3892           0 :         trid = &nvme_ctrlr->active_path_id->trid;
    3893           0 :         spdk_json_write_named_object_begin(w, "trid");
    3894           0 :         nvme_bdev_dump_trid_json(trid, w);
    3895           0 :         spdk_json_write_object_end(w);
    3896             : 
    3897           0 :         path_id = TAILQ_NEXT(nvme_ctrlr->active_path_id, link);
    3898           0 :         if (path_id != NULL) {
    3899           0 :                 spdk_json_write_named_array_begin(w, "alternate_trids");
    3900           0 :                 do {
    3901           0 :                         trid = &path_id->trid;
    3902           0 :                         spdk_json_write_object_begin(w);
    3903           0 :                         nvme_bdev_dump_trid_json(trid, w);
    3904           0 :                         spdk_json_write_object_end(w);
    3905             : 
    3906           0 :                         path_id = TAILQ_NEXT(path_id, link);
    3907           0 :                 } while (path_id != NULL);
    3908           0 :                 spdk_json_write_array_end(w);
    3909           0 :         }
    3910             : 
    3911           0 :         cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    3912           0 :         spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
    3913             : 
    3914           0 :         opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
    3915           0 :         spdk_json_write_named_object_begin(w, "host");
    3916           0 :         spdk_json_write_named_string(w, "nqn", opts->hostnqn);
    3917           0 :         spdk_json_write_named_string(w, "addr", opts->src_addr);
    3918           0 :         spdk_json_write_named_string(w, "svcid", opts->src_svcid);
    3919           0 :         spdk_json_write_object_end(w);
    3920             : 
    3921           0 :         numa_id = spdk_nvme_ctrlr_get_numa_id(nvme_ctrlr->ctrlr);
    3922           0 :         if (numa_id != SPDK_ENV_NUMA_ID_ANY) {
    3923           0 :                 spdk_json_write_named_uint32(w, "numa_id", numa_id);
    3924           0 :         }
    3925           0 :         spdk_json_write_object_end(w);
    3926           0 : }
    3927             : 
    3928             : static void
    3929           0 : nvme_namespace_info_json(struct spdk_json_write_ctx *w,
    3930             :                          struct nvme_ns *nvme_ns)
    3931             : {
    3932             :         struct spdk_nvme_ns *ns;
    3933             :         struct spdk_nvme_ctrlr *ctrlr;
    3934             :         const struct spdk_nvme_ctrlr_data *cdata;
    3935             :         const struct spdk_nvme_transport_id *trid;
    3936             :         union spdk_nvme_vs_register vs;
    3937             :         const struct spdk_nvme_ns_data *nsdata;
    3938             :         char buf[128];
    3939             : 
    3940           0 :         ns = nvme_ns->ns;
    3941           0 :         if (ns == NULL) {
    3942           0 :                 return;
    3943             :         }
    3944             : 
    3945           0 :         ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    3946             : 
    3947           0 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3948           0 :         trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
    3949           0 :         vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr);
    3950             : 
    3951           0 :         spdk_json_write_object_begin(w);
    3952             : 
    3953           0 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    3954           0 :                 spdk_json_write_named_string(w, "pci_address", trid->traddr);
    3955           0 :         }
    3956             : 
    3957           0 :         spdk_json_write_named_object_begin(w, "trid");
    3958             : 
    3959           0 :         nvme_bdev_dump_trid_json(trid, w);
    3960             : 
    3961           0 :         spdk_json_write_object_end(w);
    3962             : 
    3963             : #ifdef SPDK_CONFIG_NVME_CUSE
    3964             :         size_t cuse_name_size = 128;
    3965             :         char cuse_name[cuse_name_size];
    3966             : 
    3967             :         int rc = spdk_nvme_cuse_get_ns_name(ctrlr, spdk_nvme_ns_get_id(ns),
    3968             :                                             cuse_name, &cuse_name_size);
    3969             :         if (rc == 0) {
    3970             :                 spdk_json_write_named_string(w, "cuse_device", cuse_name);
    3971             :         }
    3972             : #endif
    3973             : 
    3974           0 :         spdk_json_write_named_object_begin(w, "ctrlr_data");
    3975             : 
    3976           0 :         spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
    3977             : 
    3978           0 :         spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
    3979             : 
    3980           0 :         snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
    3981           0 :         spdk_str_trim(buf);
    3982           0 :         spdk_json_write_named_string(w, "model_number", buf);
    3983             : 
    3984           0 :         snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
    3985           0 :         spdk_str_trim(buf);
    3986           0 :         spdk_json_write_named_string(w, "serial_number", buf);
    3987             : 
    3988           0 :         snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
    3989           0 :         spdk_str_trim(buf);
    3990           0 :         spdk_json_write_named_string(w, "firmware_revision", buf);
    3991             : 
    3992           0 :         if (cdata->subnqn[0] != '\0') {
    3993           0 :                 spdk_json_write_named_string(w, "subnqn", cdata->subnqn);
    3994           0 :         }
    3995             : 
    3996           0 :         spdk_json_write_named_object_begin(w, "oacs");
    3997             : 
    3998           0 :         spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
    3999           0 :         spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
    4000           0 :         spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
    4001           0 :         spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
    4002             : 
    4003           0 :         spdk_json_write_object_end(w);
    4004             : 
    4005           0 :         spdk_json_write_named_bool(w, "multi_ctrlr", cdata->cmic.multi_ctrlr);
    4006           0 :         spdk_json_write_named_bool(w, "ana_reporting", cdata->cmic.ana_reporting);
    4007             : 
    4008           0 :         spdk_json_write_object_end(w);
    4009             : 
    4010           0 :         spdk_json_write_named_object_begin(w, "vs");
    4011             : 
    4012           0 :         spdk_json_write_name(w, "nvme_version");
    4013           0 :         if (vs.bits.ter) {
    4014           0 :                 spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
    4015           0 :         } else {
    4016           0 :                 spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
    4017             :         }
    4018             : 
    4019           0 :         spdk_json_write_object_end(w);
    4020             : 
    4021           0 :         nsdata = spdk_nvme_ns_get_data(ns);
    4022             : 
    4023           0 :         spdk_json_write_named_object_begin(w, "ns_data");
    4024             : 
    4025           0 :         spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
    4026             : 
    4027           0 :         if (cdata->cmic.ana_reporting) {
    4028           0 :                 spdk_json_write_named_string(w, "ana_state",
    4029           0 :                                              _nvme_ana_state_str(nvme_ns->ana_state));
    4030           0 :         }
    4031             : 
    4032           0 :         spdk_json_write_named_bool(w, "can_share", nsdata->nmic.can_share);
    4033             : 
    4034           0 :         spdk_json_write_object_end(w);
    4035             : 
    4036           0 :         if (cdata->oacs.security) {
    4037           0 :                 spdk_json_write_named_object_begin(w, "security");
    4038             : 
    4039           0 :                 spdk_json_write_named_bool(w, "opal", nvme_ns->bdev->opal);
    4040             : 
    4041           0 :                 spdk_json_write_object_end(w);
    4042           0 :         }
    4043             : 
    4044           0 :         spdk_json_write_object_end(w);
    4045           0 : }
    4046             : 
    4047             : static const char *
    4048           0 : nvme_bdev_get_mp_policy_str(struct nvme_bdev *nbdev)
    4049             : {
    4050           0 :         switch (nbdev->mp_policy) {
    4051             :         case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
    4052           0 :                 return "active_passive";
    4053             :         case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
    4054           0 :                 return "active_active";
    4055             :         default:
    4056           0 :                 assert(false);
    4057             :                 return "invalid";
    4058             :         }
    4059           0 : }
    4060             : 
    4061             : static const char *
    4062           0 : nvme_bdev_get_mp_selector_str(struct nvme_bdev *nbdev)
    4063             : {
    4064           0 :         switch (nbdev->mp_selector) {
    4065             :         case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
    4066           0 :                 return "round_robin";
    4067             :         case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
    4068           0 :                 return "queue_depth";
    4069             :         default:
    4070           0 :                 assert(false);
    4071             :                 return "invalid";
    4072             :         }
    4073           0 : }
    4074             : 
    4075             : static int
    4076           0 : bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
    4077             : {
    4078           0 :         struct nvme_bdev *nvme_bdev = ctx;
    4079             :         struct nvme_ns *nvme_ns;
    4080             : 
    4081           0 :         pthread_mutex_lock(&nvme_bdev->mutex);
    4082           0 :         spdk_json_write_named_array_begin(w, "nvme");
    4083           0 :         TAILQ_FOREACH(nvme_ns, &nvme_bdev->nvme_ns_list, tailq) {
    4084           0 :                 nvme_namespace_info_json(w, nvme_ns);
    4085           0 :         }
    4086           0 :         spdk_json_write_array_end(w);
    4087           0 :         spdk_json_write_named_string(w, "mp_policy", nvme_bdev_get_mp_policy_str(nvme_bdev));
    4088           0 :         if (nvme_bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
    4089           0 :                 spdk_json_write_named_string(w, "selector", nvme_bdev_get_mp_selector_str(nvme_bdev));
    4090           0 :                 if (nvme_bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    4091           0 :                         spdk_json_write_named_uint32(w, "rr_min_io", nvme_bdev->rr_min_io);
    4092           0 :                 }
    4093           0 :         }
    4094           0 :         pthread_mutex_unlock(&nvme_bdev->mutex);
    4095             : 
    4096           0 :         return 0;
    4097             : }
    4098             : 
    4099             : static void
    4100           0 : bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
    4101             : {
    4102             :         /* No config per bdev needed */
    4103           0 : }
    4104             : 
    4105             : static uint64_t
    4106           0 : bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
    4107             : {
    4108           0 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
    4109             :         struct nvme_io_path *io_path;
    4110             :         struct nvme_poll_group *group;
    4111           0 :         uint64_t spin_time = 0;
    4112             : 
    4113           0 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    4114           0 :                 group = io_path->qpair->group;
    4115             : 
    4116           0 :                 if (!group || !group->collect_spin_stat) {
    4117           0 :                         continue;
    4118             :                 }
    4119             : 
    4120           0 :                 if (group->end_ticks != 0) {
    4121           0 :                         group->spin_ticks += (group->end_ticks - group->start_ticks);
    4122           0 :                         group->end_ticks = 0;
    4123           0 :                 }
    4124             : 
    4125           0 :                 spin_time += group->spin_ticks;
    4126           0 :                 group->start_ticks = 0;
    4127           0 :                 group->spin_ticks = 0;
    4128           0 :         }
    4129             : 
    4130           0 :         return (spin_time * 1000000ULL) / spdk_get_ticks_hz();
    4131             : }
    4132             : 
    4133             : static void
    4134           0 : bdev_nvme_reset_device_stat(void *ctx)
    4135             : {
    4136           0 :         struct nvme_bdev *nbdev = ctx;
    4137             : 
    4138           0 :         if (nbdev->err_stat != NULL) {
    4139           0 :                 memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat));
    4140           0 :         }
    4141           0 : }
    4142             : 
    4143             : /* JSON string should be lowercases and underscore delimited string. */
    4144             : static void
    4145           0 : bdev_nvme_format_nvme_status(char *dst, const char *src)
    4146             : {
    4147             :         char tmp[256];
    4148             : 
    4149           0 :         spdk_strcpy_replace(dst, 256, src, " - ", "_");
    4150           0 :         spdk_strcpy_replace(tmp, 256, dst, "-", "_");
    4151           0 :         spdk_strcpy_replace(dst, 256, tmp, " ", "_");
    4152           0 :         spdk_strlwr(dst);
    4153           0 : }
    4154             : 
    4155             : static void
    4156           0 : bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w)
    4157             : {
    4158           0 :         struct nvme_bdev *nbdev = ctx;
    4159           0 :         struct spdk_nvme_status status = {};
    4160             :         uint16_t sct, sc;
    4161             :         char status_json[256];
    4162             :         const char *status_str;
    4163             : 
    4164           0 :         if (nbdev->err_stat == NULL) {
    4165           0 :                 return;
    4166             :         }
    4167             : 
    4168           0 :         spdk_json_write_named_object_begin(w, "nvme_error");
    4169             : 
    4170           0 :         spdk_json_write_named_object_begin(w, "status_type");
    4171           0 :         for (sct = 0; sct < 8; sct++) {
    4172           0 :                 if (nbdev->err_stat->status_type[sct] == 0) {
    4173           0 :                         continue;
    4174             :                 }
    4175           0 :                 status.sct = sct;
    4176             : 
    4177           0 :                 status_str = spdk_nvme_cpl_get_status_type_string(&status);
    4178           0 :                 assert(status_str != NULL);
    4179           0 :                 bdev_nvme_format_nvme_status(status_json, status_str);
    4180             : 
    4181           0 :                 spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]);
    4182           0 :         }
    4183           0 :         spdk_json_write_object_end(w);
    4184             : 
    4185           0 :         spdk_json_write_named_object_begin(w, "status_code");
    4186           0 :         for (sct = 0; sct < 4; sct++) {
    4187           0 :                 status.sct = sct;
    4188           0 :                 for (sc = 0; sc < 256; sc++) {
    4189           0 :                         if (nbdev->err_stat->status[sct][sc] == 0) {
    4190           0 :                                 continue;
    4191             :                         }
    4192           0 :                         status.sc = sc;
    4193             : 
    4194           0 :                         status_str = spdk_nvme_cpl_get_status_string(&status);
    4195           0 :                         assert(status_str != NULL);
    4196           0 :                         bdev_nvme_format_nvme_status(status_json, status_str);
    4197             : 
    4198           0 :                         spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]);
    4199           0 :                 }
    4200           0 :         }
    4201           0 :         spdk_json_write_object_end(w);
    4202             : 
    4203           0 :         spdk_json_write_object_end(w);
    4204           0 : }
    4205             : 
    4206             : static bool
    4207           0 : bdev_nvme_accel_sequence_supported(void *ctx, enum spdk_bdev_io_type type)
    4208             : {
    4209           0 :         struct nvme_bdev *nbdev = ctx;
    4210             :         struct spdk_nvme_ctrlr *ctrlr;
    4211             : 
    4212           0 :         if (!g_opts.allow_accel_sequence) {
    4213           0 :                 return false;
    4214             :         }
    4215             : 
    4216           0 :         switch (type) {
    4217             :         case SPDK_BDEV_IO_TYPE_WRITE:
    4218             :         case SPDK_BDEV_IO_TYPE_READ:
    4219           0 :                 break;
    4220             :         default:
    4221           0 :                 return false;
    4222             :         }
    4223             : 
    4224           0 :         ctrlr = bdev_nvme_get_ctrlr(&nbdev->disk);
    4225           0 :         assert(ctrlr != NULL);
    4226             : 
    4227           0 :         return spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
    4228           0 : }
    4229             : 
    4230             : static const struct spdk_bdev_fn_table nvmelib_fn_table = {
    4231             :         .destruct                       = bdev_nvme_destruct,
    4232             :         .submit_request                 = bdev_nvme_submit_request,
    4233             :         .io_type_supported              = bdev_nvme_io_type_supported,
    4234             :         .get_io_channel                 = bdev_nvme_get_io_channel,
    4235             :         .dump_info_json                 = bdev_nvme_dump_info_json,
    4236             :         .write_config_json              = bdev_nvme_write_config_json,
    4237             :         .get_spin_time                  = bdev_nvme_get_spin_time,
    4238             :         .get_module_ctx                 = bdev_nvme_get_module_ctx,
    4239             :         .get_memory_domains             = bdev_nvme_get_memory_domains,
    4240             :         .accel_sequence_supported       = bdev_nvme_accel_sequence_supported,
    4241             :         .reset_device_stat              = bdev_nvme_reset_device_stat,
    4242             :         .dump_device_stat_json          = bdev_nvme_dump_device_stat_json,
    4243             : };
    4244             : 
    4245             : typedef int (*bdev_nvme_parse_ana_log_page_cb)(
    4246             :         const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg);
    4247             : 
    4248             : static int
    4249          41 : bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
    4250             :                              bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
    4251             : {
    4252             :         struct spdk_nvme_ana_group_descriptor *copied_desc;
    4253             :         uint8_t *orig_desc;
    4254             :         uint32_t i, desc_size, copy_len;
    4255          41 :         int rc = 0;
    4256             : 
    4257          41 :         if (nvme_ctrlr->ana_log_page == NULL) {
    4258           0 :                 return -EINVAL;
    4259             :         }
    4260             : 
    4261          41 :         copied_desc = nvme_ctrlr->copied_ana_desc;
    4262             : 
    4263          41 :         orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
    4264          41 :         copy_len = nvme_ctrlr->max_ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
    4265             : 
    4266          71 :         for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) {
    4267          66 :                 memcpy(copied_desc, orig_desc, copy_len);
    4268             : 
    4269          66 :                 rc = cb_fn(copied_desc, cb_arg);
    4270          66 :                 if (rc != 0) {
    4271          36 :                         break;
    4272             :                 }
    4273             : 
    4274          30 :                 desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
    4275          30 :                             copied_desc->num_of_nsid * sizeof(uint32_t);
    4276          30 :                 orig_desc += desc_size;
    4277          30 :                 copy_len -= desc_size;
    4278          30 :         }
    4279             : 
    4280          41 :         return rc;
    4281          41 : }
    4282             : 
    4283             : static int
    4284           5 : nvme_ns_ana_transition_timedout(void *ctx)
    4285             : {
    4286           5 :         struct nvme_ns *nvme_ns = ctx;
    4287             : 
    4288           5 :         spdk_poller_unregister(&nvme_ns->anatt_timer);
    4289           5 :         nvme_ns->ana_transition_timedout = true;
    4290             : 
    4291           5 :         return SPDK_POLLER_BUSY;
    4292             : }
    4293             : 
    4294             : static void
    4295          45 : _nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
    4296             :                        const struct spdk_nvme_ana_group_descriptor *desc)
    4297             : {
    4298             :         const struct spdk_nvme_ctrlr_data *cdata;
    4299             : 
    4300          45 :         nvme_ns->ana_group_id = desc->ana_group_id;
    4301          45 :         nvme_ns->ana_state = desc->ana_state;
    4302          45 :         nvme_ns->ana_state_updating = false;
    4303             : 
    4304          45 :         switch (nvme_ns->ana_state) {
    4305             :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    4306             :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    4307          38 :                 nvme_ns->ana_transition_timedout = false;
    4308          38 :                 spdk_poller_unregister(&nvme_ns->anatt_timer);
    4309          38 :                 break;
    4310             : 
    4311             :         case SPDK_NVME_ANA_INACCESSIBLE_STATE:
    4312             :         case SPDK_NVME_ANA_CHANGE_STATE:
    4313           6 :                 if (nvme_ns->anatt_timer != NULL) {
    4314           1 :                         break;
    4315             :                 }
    4316             : 
    4317           5 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
    4318           5 :                 nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
    4319             :                                        nvme_ns,
    4320             :                                        cdata->anatt * SPDK_SEC_TO_USEC);
    4321           5 :                 break;
    4322             :         default:
    4323           1 :                 break;
    4324             :         }
    4325          45 : }
    4326             : 
    4327             : static int
    4328          59 : nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg)
    4329             : {
    4330          59 :         struct nvme_ns *nvme_ns = cb_arg;
    4331             :         uint32_t i;
    4332             : 
    4333          59 :         assert(nvme_ns->ns != NULL);
    4334             : 
    4335          81 :         for (i = 0; i < desc->num_of_nsid; i++) {
    4336          58 :                 if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) {
    4337          22 :                         continue;
    4338             :                 }
    4339             : 
    4340          36 :                 _nvme_ns_set_ana_state(nvme_ns, desc);
    4341          36 :                 return 1;
    4342             :         }
    4343             : 
    4344          23 :         return 0;
    4345          59 : }
    4346             : 
    4347             : static int
    4348           5 : nvme_generate_uuid(const char *sn, uint32_t nsid, struct spdk_uuid *uuid)
    4349             : {
    4350           5 :         int rc = 0;
    4351             :         struct spdk_uuid new_uuid, namespace_uuid;
    4352           5 :         char merged_str[SPDK_NVME_CTRLR_SN_LEN + NSID_STR_LEN + 1] = {'\0'};
    4353             :         /* This namespace UUID was generated using uuid_generate() method. */
    4354           5 :         const char *namespace_str = {"edaed2de-24bc-4b07-b559-f47ecbe730fd"};
    4355             :         int size;
    4356             : 
    4357           5 :         assert(strlen(sn) <= SPDK_NVME_CTRLR_SN_LEN);
    4358             : 
    4359           5 :         spdk_uuid_set_null(&new_uuid);
    4360           5 :         spdk_uuid_set_null(&namespace_uuid);
    4361             : 
    4362           5 :         size = snprintf(merged_str, sizeof(merged_str), "%s%"PRIu32, sn, nsid);
    4363           5 :         if (size <= 0 || (unsigned long)size >= sizeof(merged_str)) {
    4364           0 :                 return -EINVAL;
    4365             :         }
    4366             : 
    4367           5 :         spdk_uuid_parse(&namespace_uuid, namespace_str);
    4368             : 
    4369           5 :         rc = spdk_uuid_generate_sha1(&new_uuid, &namespace_uuid, merged_str, size);
    4370           5 :         if (rc == 0) {
    4371           5 :                 memcpy(uuid, &new_uuid, sizeof(struct spdk_uuid));
    4372           5 :         }
    4373             : 
    4374           5 :         return rc;
    4375           5 : }
    4376             : 
    4377             : static int
    4378          38 : nvme_disk_create(struct spdk_bdev *disk, const char *base_name,
    4379             :                  struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns,
    4380             :                  struct spdk_bdev_nvme_ctrlr_opts *bdev_opts, void *ctx)
    4381             : {
    4382             :         const struct spdk_uuid          *uuid;
    4383             :         const uint8_t *nguid;
    4384             :         const struct spdk_nvme_ctrlr_data *cdata;
    4385             :         const struct spdk_nvme_ns_data  *nsdata;
    4386             :         const struct spdk_nvme_ctrlr_opts *opts;
    4387             :         enum spdk_nvme_csi              csi;
    4388             :         uint32_t atomic_bs, phys_bs, bs;
    4389          38 :         char sn_tmp[SPDK_NVME_CTRLR_SN_LEN + 1] = {'\0'};
    4390             :         int rc;
    4391             : 
    4392          38 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    4393          38 :         csi = spdk_nvme_ns_get_csi(ns);
    4394          38 :         opts = spdk_nvme_ctrlr_get_opts(ctrlr);
    4395             : 
    4396          38 :         switch (csi) {
    4397             :         case SPDK_NVME_CSI_NVM:
    4398          38 :                 disk->product_name = "NVMe disk";
    4399          38 :                 break;
    4400             :         case SPDK_NVME_CSI_ZNS:
    4401           0 :                 disk->product_name = "NVMe ZNS disk";
    4402           0 :                 disk->zoned = true;
    4403           0 :                 disk->zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    4404           0 :                 disk->max_zone_append_size = spdk_nvme_zns_ctrlr_get_max_zone_append_size(ctrlr) /
    4405           0 :                                              spdk_nvme_ns_get_extended_sector_size(ns);
    4406           0 :                 disk->max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(ns);
    4407           0 :                 disk->max_active_zones = spdk_nvme_zns_ns_get_max_active_zones(ns);
    4408           0 :                 break;
    4409             :         default:
    4410           0 :                 if (bdev_opts->allow_unrecognized_csi) {
    4411           0 :                         disk->product_name = "NVMe Passthrough disk";
    4412           0 :                         break;
    4413             :                 }
    4414           0 :                 SPDK_ERRLOG("unsupported CSI: %u\n", csi);
    4415           0 :                 return -ENOTSUP;
    4416             :         }
    4417             : 
    4418          38 :         nguid = spdk_nvme_ns_get_nguid(ns);
    4419          38 :         if (!nguid) {
    4420          38 :                 uuid = spdk_nvme_ns_get_uuid(ns);
    4421          38 :                 if (uuid) {
    4422          12 :                         disk->uuid = *uuid;
    4423          38 :                 } else if (g_opts.generate_uuids) {
    4424           0 :                         spdk_strcpy_pad(sn_tmp, cdata->sn, SPDK_NVME_CTRLR_SN_LEN, '\0');
    4425           0 :                         rc = nvme_generate_uuid(sn_tmp, spdk_nvme_ns_get_id(ns), &disk->uuid);
    4426           0 :                         if (rc < 0) {
    4427           0 :                                 SPDK_ERRLOG("UUID generation failed (%s)\n", spdk_strerror(-rc));
    4428           0 :                                 return rc;
    4429             :                         }
    4430           0 :                 }
    4431          38 :         } else {
    4432           0 :                 memcpy(&disk->uuid, nguid, sizeof(disk->uuid));
    4433             :         }
    4434             : 
    4435          38 :         disk->name = spdk_sprintf_alloc("%sn%d", base_name, spdk_nvme_ns_get_id(ns));
    4436          38 :         if (!disk->name) {
    4437           0 :                 return -ENOMEM;
    4438             :         }
    4439             : 
    4440          38 :         disk->write_cache = 0;
    4441          38 :         if (cdata->vwc.present) {
    4442             :                 /* Enable if the Volatile Write Cache exists */
    4443           0 :                 disk->write_cache = 1;
    4444           0 :         }
    4445          38 :         if (cdata->oncs.write_zeroes) {
    4446           0 :                 disk->max_write_zeroes = UINT16_MAX + 1;
    4447           0 :         }
    4448          38 :         disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
    4449          38 :         disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns);
    4450          38 :         disk->max_segment_size = spdk_nvme_ctrlr_get_max_xfer_size(ctrlr);
    4451          38 :         disk->ctratt.raw = cdata->ctratt.raw;
    4452             :         /* NVMe driver will split one request into multiple requests
    4453             :          * based on MDTS and stripe boundary, the bdev layer will use
    4454             :          * max_segment_size and max_num_segments to split one big IO
    4455             :          * into multiple requests, then small request can't run out
    4456             :          * of NVMe internal requests data structure.
    4457             :          */
    4458          38 :         if (opts && opts->io_queue_requests) {
    4459           0 :                 disk->max_num_segments = opts->io_queue_requests / 2;
    4460           0 :         }
    4461          38 :         if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_SGL_SUPPORTED) {
    4462             :                 /* The nvme driver will try to split I/O that have too many
    4463             :                  * SGEs, but it doesn't work if that last SGE doesn't end on
    4464             :                  * an aggregate total that is block aligned. The bdev layer has
    4465             :                  * a more robust splitting framework, so use that instead for
    4466             :                  * this case. (See issue #3269.)
    4467             :                  */
    4468           0 :                 uint16_t max_sges = spdk_nvme_ctrlr_get_max_sges(ctrlr);
    4469             : 
    4470           0 :                 if (disk->max_num_segments == 0) {
    4471           0 :                         disk->max_num_segments = max_sges;
    4472           0 :                 } else {
    4473           0 :                         disk->max_num_segments = spdk_min(disk->max_num_segments, max_sges);
    4474             :                 }
    4475           0 :         }
    4476          38 :         disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
    4477             : 
    4478          38 :         nsdata = spdk_nvme_ns_get_data(ns);
    4479          38 :         bs = spdk_nvme_ns_get_sector_size(ns);
    4480          38 :         atomic_bs = bs;
    4481          38 :         phys_bs = bs;
    4482          38 :         if (nsdata->nabo == 0) {
    4483          38 :                 if (nsdata->nsfeat.ns_atomic_write_unit && nsdata->nawupf) {
    4484           0 :                         atomic_bs = bs * (1 + nsdata->nawupf);
    4485           0 :                 } else {
    4486          38 :                         atomic_bs = bs * (1 + cdata->awupf);
    4487             :                 }
    4488          38 :         }
    4489          38 :         if (nsdata->nsfeat.optperf) {
    4490           0 :                 phys_bs = bs * (1 + nsdata->npwg);
    4491           0 :         }
    4492          38 :         disk->phys_blocklen = spdk_min(phys_bs, atomic_bs);
    4493             : 
    4494          38 :         disk->md_len = spdk_nvme_ns_get_md_size(ns);
    4495          38 :         if (disk->md_len != 0) {
    4496           0 :                 disk->md_interleave = nsdata->flbas.extended;
    4497           0 :                 disk->dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
    4498           0 :                 if (disk->dif_type != SPDK_DIF_DISABLE) {
    4499           0 :                         disk->dif_is_head_of_md = nsdata->dps.md_start;
    4500           0 :                         disk->dif_check_flags = bdev_opts->prchk_flags;
    4501           0 :                         disk->dif_pi_format = (enum spdk_dif_pi_format)spdk_nvme_ns_get_pi_format(ns);
    4502           0 :                 }
    4503           0 :         }
    4504             : 
    4505          38 :         if (!(spdk_nvme_ctrlr_get_flags(ctrlr) &
    4506             :               SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED)) {
    4507          38 :                 disk->acwu = 0;
    4508          38 :         } else if (nsdata->nsfeat.ns_atomic_write_unit) {
    4509           0 :                 disk->acwu = nsdata->nacwu + 1; /* 0-based */
    4510           0 :         } else {
    4511           0 :                 disk->acwu = cdata->acwu + 1; /* 0-based */
    4512             :         }
    4513             : 
    4514          38 :         if (cdata->oncs.copy) {
    4515             :                 /* For now bdev interface allows only single segment copy */
    4516           0 :                 disk->max_copy = nsdata->mssrl;
    4517           0 :         }
    4518             : 
    4519          38 :         disk->ctxt = ctx;
    4520          38 :         disk->fn_table = &nvmelib_fn_table;
    4521          38 :         disk->module = &nvme_if;
    4522             : 
    4523          38 :         disk->numa.id_valid = 1;
    4524          38 :         disk->numa.id = spdk_nvme_ctrlr_get_numa_id(ctrlr);
    4525             : 
    4526          38 :         return 0;
    4527          38 : }
    4528             : 
    4529             : static struct nvme_bdev *
    4530          38 : nvme_bdev_alloc(void)
    4531             : {
    4532             :         struct nvme_bdev *bdev;
    4533             :         int rc;
    4534             : 
    4535          38 :         bdev = calloc(1, sizeof(*bdev));
    4536          38 :         if (!bdev) {
    4537           0 :                 SPDK_ERRLOG("bdev calloc() failed\n");
    4538           0 :                 return NULL;
    4539             :         }
    4540             : 
    4541          38 :         if (g_opts.nvme_error_stat) {
    4542           0 :                 bdev->err_stat = calloc(1, sizeof(struct nvme_error_stat));
    4543           0 :                 if (!bdev->err_stat) {
    4544           0 :                         SPDK_ERRLOG("err_stat calloc() failed\n");
    4545           0 :                         free(bdev);
    4546           0 :                         return NULL;
    4547             :                 }
    4548           0 :         }
    4549             : 
    4550          38 :         rc = pthread_mutex_init(&bdev->mutex, NULL);
    4551          38 :         if (rc != 0) {
    4552           0 :                 free(bdev->err_stat);
    4553           0 :                 free(bdev);
    4554           0 :                 return NULL;
    4555             :         }
    4556             : 
    4557          38 :         bdev->ref = 1;
    4558          38 :         bdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE;
    4559          38 :         bdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN;
    4560          38 :         bdev->rr_min_io = UINT32_MAX;
    4561          38 :         TAILQ_INIT(&bdev->nvme_ns_list);
    4562             : 
    4563          38 :         return bdev;
    4564          38 : }
    4565             : 
    4566             : static int
    4567          38 : nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4568             : {
    4569             :         struct nvme_bdev *bdev;
    4570          38 :         struct nvme_bdev_ctrlr *nbdev_ctrlr = nvme_ctrlr->nbdev_ctrlr;
    4571             :         int rc;
    4572             : 
    4573          38 :         bdev = nvme_bdev_alloc();
    4574          38 :         if (bdev == NULL) {
    4575           0 :                 SPDK_ERRLOG("Failed to allocate NVMe bdev\n");
    4576           0 :                 return -ENOMEM;
    4577             :         }
    4578             : 
    4579          38 :         bdev->opal = nvme_ctrlr->opal_dev != NULL;
    4580             : 
    4581          76 :         rc = nvme_disk_create(&bdev->disk, nbdev_ctrlr->name, nvme_ctrlr->ctrlr,
    4582          38 :                               nvme_ns->ns, &nvme_ctrlr->opts, bdev);
    4583          38 :         if (rc != 0) {
    4584           0 :                 SPDK_ERRLOG("Failed to create NVMe disk\n");
    4585           0 :                 nvme_bdev_free(bdev);
    4586           0 :                 return rc;
    4587             :         }
    4588             : 
    4589          76 :         spdk_io_device_register(bdev,
    4590             :                                 bdev_nvme_create_bdev_channel_cb,
    4591             :                                 bdev_nvme_destroy_bdev_channel_cb,
    4592             :                                 sizeof(struct nvme_bdev_channel),
    4593          38 :                                 bdev->disk.name);
    4594             : 
    4595          38 :         nvme_ns->bdev = bdev;
    4596          38 :         bdev->nsid = nvme_ns->id;
    4597          38 :         TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
    4598             : 
    4599          38 :         bdev->nbdev_ctrlr = nbdev_ctrlr;
    4600          38 :         TAILQ_INSERT_TAIL(&nbdev_ctrlr->bdevs, bdev, tailq);
    4601             : 
    4602          38 :         rc = spdk_bdev_register(&bdev->disk);
    4603          38 :         if (rc != 0) {
    4604           1 :                 SPDK_ERRLOG("spdk_bdev_register() failed\n");
    4605           1 :                 spdk_io_device_unregister(bdev, NULL);
    4606           1 :                 nvme_ns->bdev = NULL;
    4607           1 :                 TAILQ_REMOVE(&nbdev_ctrlr->bdevs, bdev, tailq);
    4608           1 :                 nvme_bdev_free(bdev);
    4609           1 :                 return rc;
    4610             :         }
    4611             : 
    4612          37 :         return 0;
    4613          38 : }
    4614             : 
    4615             : static bool
    4616          23 : bdev_nvme_compare_ns(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
    4617             : {
    4618             :         const struct spdk_nvme_ns_data *nsdata1, *nsdata2;
    4619             :         const struct spdk_uuid *uuid1, *uuid2;
    4620             : 
    4621          23 :         nsdata1 = spdk_nvme_ns_get_data(ns1);
    4622          23 :         nsdata2 = spdk_nvme_ns_get_data(ns2);
    4623          23 :         uuid1 = spdk_nvme_ns_get_uuid(ns1);
    4624          23 :         uuid2 = spdk_nvme_ns_get_uuid(ns2);
    4625             : 
    4626          71 :         return memcmp(nsdata1->nguid, nsdata2->nguid, sizeof(nsdata1->nguid)) == 0 &&
    4627          22 :                nsdata1->eui64 == nsdata2->eui64 &&
    4628          21 :                ((uuid1 == NULL && uuid2 == NULL) ||
    4629          29 :                 (uuid1 != NULL && uuid2 != NULL && spdk_uuid_compare(uuid1, uuid2) == 0)) &&
    4630          18 :                spdk_nvme_ns_get_csi(ns1) == spdk_nvme_ns_get_csi(ns2);
    4631             : }
    4632             : 
    4633             : static bool
    4634           0 : hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    4635             :                  struct spdk_nvme_ctrlr_opts *opts)
    4636             : {
    4637             :         struct nvme_probe_skip_entry *entry;
    4638             : 
    4639           0 :         TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
    4640           0 :                 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
    4641           0 :                         return false;
    4642             :                 }
    4643           0 :         }
    4644             : 
    4645           0 :         opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
    4646           0 :         opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
    4647           0 :         opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
    4648           0 :         opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
    4649           0 :         opts->disable_read_ana_log_page = true;
    4650             : 
    4651           0 :         SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr);
    4652             : 
    4653           0 :         return true;
    4654           0 : }
    4655             : 
    4656             : static void
    4657           0 : nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
    4658             : {
    4659           0 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    4660             : 
    4661           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    4662           0 :                 NVME_CTRLR_WARNLOG(nvme_ctrlr, "Abort failed. Resetting controller. sc is %u, sct is %u.\n",
    4663             :                                    cpl->status.sc, cpl->status.sct);
    4664           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4665           0 :         } else if (cpl->cdw0 & 0x1) {
    4666           0 :                 NVME_CTRLR_WARNLOG(nvme_ctrlr, "Specified command could not be aborted.\n");
    4667           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4668           0 :         }
    4669           0 : }
    4670             : 
    4671             : static void
    4672           0 : timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
    4673             :            struct spdk_nvme_qpair *qpair, uint16_t cid)
    4674             : {
    4675           0 :         struct nvme_ctrlr *nvme_ctrlr = cb_arg;
    4676             :         union spdk_nvme_csts_register csts;
    4677             :         int rc;
    4678             : 
    4679           0 :         assert(nvme_ctrlr->ctrlr == ctrlr);
    4680             : 
    4681           0 :         NVME_CTRLR_WARNLOG(nvme_ctrlr, "Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n",
    4682             :                            ctrlr, qpair, cid);
    4683             : 
    4684             :         /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O
    4685             :          * queue.  (Note: qpair == NULL when there's an admin cmd timeout.)  Otherwise we
    4686             :          * would submit another fabrics cmd on the admin queue to read CSTS and check for its
    4687             :          * completion recursively.
    4688             :          */
    4689           0 :         if (nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE || qpair != NULL) {
    4690           0 :                 csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
    4691           0 :                 if (csts.bits.cfs) {
    4692           0 :                         NVME_CTRLR_ERRLOG(nvme_ctrlr, "Controller Fatal Status, reset required\n");
    4693           0 :                         bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4694           0 :                         return;
    4695             :                 }
    4696           0 :         }
    4697             : 
    4698           0 :         switch (g_opts.action_on_timeout) {
    4699             :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
    4700           0 :                 if (qpair) {
    4701             :                         /* Don't send abort to ctrlr when ctrlr is not available. */
    4702           0 :                         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4703           0 :                         if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
    4704           0 :                                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4705           0 :                                 NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Quit abort. Ctrlr is not available.\n");
    4706           0 :                                 return;
    4707             :                         }
    4708           0 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4709             : 
    4710           0 :                         rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
    4711           0 :                                                        nvme_abort_cpl, nvme_ctrlr);
    4712           0 :                         if (rc == 0) {
    4713           0 :                                 return;
    4714             :                         }
    4715             : 
    4716           0 :                         NVME_CTRLR_ERRLOG(nvme_ctrlr, "Unable to send abort. Resetting, rc is %d.\n", rc);
    4717           0 :                 }
    4718             : 
    4719             :         /* FALLTHROUGH */
    4720             :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
    4721           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4722           0 :                 break;
    4723             :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
    4724           0 :                 NVME_CTRLR_DEBUGLOG(nvme_ctrlr, "No action for nvme controller timeout.\n");
    4725           0 :                 break;
    4726             :         default:
    4727           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "An invalid timeout action value is found.\n");
    4728           0 :                 break;
    4729             :         }
    4730           0 : }
    4731             : 
    4732             : static struct nvme_ns *
    4733          51 : nvme_ns_alloc(void)
    4734             : {
    4735             :         struct nvme_ns *nvme_ns;
    4736             : 
    4737          51 :         nvme_ns = calloc(1, sizeof(struct nvme_ns));
    4738          51 :         if (nvme_ns == NULL) {
    4739           0 :                 return NULL;
    4740             :         }
    4741             : 
    4742          51 :         if (g_opts.io_path_stat) {
    4743           0 :                 nvme_ns->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
    4744           0 :                 if (nvme_ns->stat == NULL) {
    4745           0 :                         free(nvme_ns);
    4746           0 :                         return NULL;
    4747             :                 }
    4748           0 :                 spdk_bdev_reset_io_stat(nvme_ns->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
    4749           0 :         }
    4750             : 
    4751          51 :         return nvme_ns;
    4752          51 : }
    4753             : 
    4754             : static void
    4755          51 : nvme_ns_free(struct nvme_ns *nvme_ns)
    4756             : {
    4757          51 :         free(nvme_ns->stat);
    4758          51 :         free(nvme_ns);
    4759          51 : }
    4760             : 
    4761             : static void
    4762          51 : nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc)
    4763             : {
    4764          51 :         struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
    4765          51 :         struct nvme_async_probe_ctx *ctx = nvme_ns->probe_ctx;
    4766             : 
    4767          51 :         if (rc == 0) {
    4768          49 :                 nvme_ns->probe_ctx = NULL;
    4769          49 :                 pthread_mutex_lock(&nvme_ctrlr->mutex);
    4770          49 :                 nvme_ctrlr->ref++;
    4771          49 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4772          49 :         } else {
    4773           2 :                 RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4774           2 :                 nvme_ns_free(nvme_ns);
    4775             :         }
    4776             : 
    4777          51 :         if (ctx) {
    4778          50 :                 ctx->populates_in_progress--;
    4779          50 :                 if (ctx->populates_in_progress == 0) {
    4780          12 :                         nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
    4781          12 :                 }
    4782          50 :         }
    4783          51 : }
    4784             : 
    4785             : static void
    4786           2 : bdev_nvme_add_io_path(struct nvme_bdev_channel_iter *i,
    4787             :                       struct nvme_bdev *nbdev,
    4788             :                       struct nvme_bdev_channel *nbdev_ch, void *ctx)
    4789             : {
    4790           2 :         struct nvme_ns *nvme_ns = ctx;
    4791             :         int rc;
    4792             : 
    4793           2 :         rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
    4794           2 :         if (rc != 0) {
    4795           0 :                 SPDK_ERRLOG("Failed to add I/O path to bdev_channel dynamically.\n");
    4796           0 :         }
    4797             : 
    4798           2 :         nvme_bdev_for_each_channel_continue(i, rc);
    4799           2 : }
    4800             : 
    4801             : static void
    4802           2 : bdev_nvme_delete_io_path(struct nvme_bdev_channel_iter *i,
    4803             :                          struct nvme_bdev *nbdev,
    4804             :                          struct nvme_bdev_channel *nbdev_ch, void *ctx)
    4805             : {
    4806           2 :         struct nvme_ns *nvme_ns = ctx;
    4807             :         struct nvme_io_path *io_path;
    4808             : 
    4809           2 :         io_path = _bdev_nvme_get_io_path(nbdev_ch, nvme_ns);
    4810           2 :         if (io_path != NULL) {
    4811           2 :                 _bdev_nvme_delete_io_path(nbdev_ch, io_path);
    4812           2 :         }
    4813             : 
    4814           2 :         nvme_bdev_for_each_channel_continue(i, 0);
    4815           2 : }
    4816             : 
    4817             : static void
    4818           0 : bdev_nvme_add_io_path_failed(struct nvme_bdev *nbdev, void *ctx, int status)
    4819             : {
    4820           0 :         struct nvme_ns *nvme_ns = ctx;
    4821             : 
    4822           0 :         nvme_ctrlr_populate_namespace_done(nvme_ns, -1);
    4823           0 : }
    4824             : 
    4825             : static void
    4826          12 : bdev_nvme_add_io_path_done(struct nvme_bdev *nbdev, void *ctx, int status)
    4827             : {
    4828          12 :         struct nvme_ns *nvme_ns = ctx;
    4829             : 
    4830          12 :         if (status == 0) {
    4831          12 :                 nvme_ctrlr_populate_namespace_done(nvme_ns, 0);
    4832          12 :         } else {
    4833             :                 /* Delete the added io_paths and fail populating the namespace. */
    4834           0 :                 nvme_bdev_for_each_channel(nbdev,
    4835             :                                            bdev_nvme_delete_io_path,
    4836           0 :                                            nvme_ns,
    4837             :                                            bdev_nvme_add_io_path_failed);
    4838             :         }
    4839          12 : }
    4840             : 
    4841             : static int
    4842          13 : nvme_bdev_add_ns(struct nvme_bdev *bdev, struct nvme_ns *nvme_ns)
    4843             : {
    4844             :         struct nvme_ns *tmp_ns;
    4845             :         const struct spdk_nvme_ns_data *nsdata;
    4846             : 
    4847          13 :         nsdata = spdk_nvme_ns_get_data(nvme_ns->ns);
    4848          13 :         if (!nsdata->nmic.can_share) {
    4849           0 :                 SPDK_ERRLOG("Namespace cannot be shared.\n");
    4850           0 :                 return -EINVAL;
    4851             :         }
    4852             : 
    4853          13 :         pthread_mutex_lock(&bdev->mutex);
    4854             : 
    4855          13 :         tmp_ns = TAILQ_FIRST(&bdev->nvme_ns_list);
    4856          13 :         assert(tmp_ns != NULL);
    4857             : 
    4858          13 :         if (tmp_ns->ns != NULL && !bdev_nvme_compare_ns(nvme_ns->ns, tmp_ns->ns)) {
    4859           1 :                 pthread_mutex_unlock(&bdev->mutex);
    4860           1 :                 SPDK_ERRLOG("Namespaces are not identical.\n");
    4861           1 :                 return -EINVAL;
    4862             :         }
    4863             : 
    4864          12 :         bdev->ref++;
    4865          12 :         TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
    4866          12 :         nvme_ns->bdev = bdev;
    4867             : 
    4868          12 :         pthread_mutex_unlock(&bdev->mutex);
    4869             : 
    4870             :         /* Add nvme_io_path to nvme_bdev_channels dynamically. */
    4871          24 :         nvme_bdev_for_each_channel(bdev,
    4872             :                                    bdev_nvme_add_io_path,
    4873          12 :                                    nvme_ns,
    4874             :                                    bdev_nvme_add_io_path_done);
    4875             : 
    4876          12 :         return 0;
    4877          13 : }
    4878             : 
    4879             : static void
    4880          51 : nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4881             : {
    4882             :         struct spdk_nvme_ns     *ns;
    4883             :         struct nvme_bdev        *bdev;
    4884          51 :         int                     rc = 0;
    4885             : 
    4886          51 :         ns = spdk_nvme_ctrlr_get_ns(nvme_ctrlr->ctrlr, nvme_ns->id);
    4887          51 :         if (!ns) {
    4888           0 :                 NVME_CTRLR_DEBUGLOG(nvme_ctrlr, "Invalid NS %d\n", nvme_ns->id);
    4889           0 :                 rc = -EINVAL;
    4890           0 :                 goto done;
    4891             :         }
    4892             : 
    4893          51 :         nvme_ns->ns = ns;
    4894          51 :         nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
    4895             : 
    4896          51 :         if (nvme_ctrlr->ana_log_page != NULL) {
    4897          37 :                 bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns);
    4898          37 :         }
    4899             : 
    4900          51 :         bdev = nvme_bdev_ctrlr_get_bdev(nvme_ctrlr->nbdev_ctrlr, nvme_ns->id);
    4901          90 :         if (bdev == NULL) {
    4902          38 :                 rc = nvme_bdev_create(nvme_ctrlr, nvme_ns);
    4903          38 :         } else {
    4904          13 :                 rc = nvme_bdev_add_ns(bdev, nvme_ns);
    4905          13 :                 if (rc == 0) {
    4906          12 :                         return;
    4907             :                 }
    4908             :         }
    4909             : done:
    4910          39 :         nvme_ctrlr_populate_namespace_done(nvme_ns, rc);
    4911          51 : }
    4912             : 
    4913             : static void
    4914          49 : nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns)
    4915             : {
    4916          49 :         struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
    4917             : 
    4918          49 :         assert(nvme_ctrlr != NULL);
    4919             : 
    4920          49 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4921             : 
    4922          49 :         RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4923             : 
    4924          49 :         if (nvme_ns->bdev != NULL) {
    4925           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4926           0 :                 return;
    4927             :         }
    4928             : 
    4929          49 :         nvme_ns_free(nvme_ns);
    4930          49 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4931             : 
    4932          49 :         nvme_ctrlr_release(nvme_ctrlr);
    4933          49 : }
    4934             : 
    4935             : static void
    4936          11 : bdev_nvme_delete_io_path_done(struct nvme_bdev *nbdev, void *ctx, int status)
    4937             : {
    4938          11 :         struct nvme_ns *nvme_ns = ctx;
    4939             : 
    4940          11 :         nvme_ctrlr_depopulate_namespace_done(nvme_ns);
    4941          11 : }
    4942             : 
    4943             : static void
    4944          49 : nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4945             : {
    4946             :         struct nvme_bdev *bdev;
    4947             : 
    4948          49 :         spdk_poller_unregister(&nvme_ns->anatt_timer);
    4949             : 
    4950          49 :         bdev = nvme_ns->bdev;
    4951          49 :         if (bdev != NULL) {
    4952          45 :                 pthread_mutex_lock(&bdev->mutex);
    4953             : 
    4954          45 :                 assert(bdev->ref > 0);
    4955          45 :                 bdev->ref--;
    4956          45 :                 if (bdev->ref == 0) {
    4957          34 :                         pthread_mutex_unlock(&bdev->mutex);
    4958             : 
    4959          34 :                         spdk_bdev_unregister(&bdev->disk, NULL, NULL);
    4960          34 :                 } else {
    4961             :                         /* spdk_bdev_unregister() is not called until the last nvme_ns is
    4962             :                          * depopulated. Hence we need to remove nvme_ns from bdev->nvme_ns_list
    4963             :                          * and clear nvme_ns->bdev here.
    4964             :                          */
    4965          11 :                         TAILQ_REMOVE(&bdev->nvme_ns_list, nvme_ns, tailq);
    4966          11 :                         nvme_ns->bdev = NULL;
    4967             : 
    4968          11 :                         pthread_mutex_unlock(&bdev->mutex);
    4969             : 
    4970             :                         /* Delete nvme_io_paths from nvme_bdev_channels dynamically. After that,
    4971             :                          * we call depopulate_namespace_done() to avoid use-after-free.
    4972             :                          */
    4973          22 :                         nvme_bdev_for_each_channel(bdev,
    4974             :                                                    bdev_nvme_delete_io_path,
    4975          11 :                                                    nvme_ns,
    4976             :                                                    bdev_nvme_delete_io_path_done);
    4977          11 :                         return;
    4978             :                 }
    4979          34 :         }
    4980             : 
    4981          38 :         nvme_ctrlr_depopulate_namespace_done(nvme_ns);
    4982          49 : }
    4983             : 
    4984             : static void
    4985          62 : nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
    4986             :                                struct nvme_async_probe_ctx *ctx)
    4987             : {
    4988          62 :         struct spdk_nvme_ctrlr  *ctrlr = nvme_ctrlr->ctrlr;
    4989             :         struct nvme_ns  *nvme_ns, *next;
    4990             :         struct spdk_nvme_ns     *ns;
    4991             :         struct nvme_bdev        *bdev;
    4992             :         uint32_t                nsid;
    4993             :         int                     rc;
    4994             :         uint64_t                num_sectors;
    4995             : 
    4996          62 :         if (ctx) {
    4997             :                 /* Initialize this count to 1 to handle the populate functions
    4998             :                  * calling nvme_ctrlr_populate_namespace_done() immediately.
    4999             :                  */
    5000          46 :                 ctx->populates_in_progress = 1;
    5001          46 :         }
    5002             : 
    5003             :         /* First loop over our existing namespaces and see if they have been
    5004             :          * removed. */
    5005          62 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    5006          66 :         while (nvme_ns != NULL) {
    5007           4 :                 next = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    5008             : 
    5009           4 :                 if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
    5010             :                         /* NS is still there or added again. Its attributes may have changed. */
    5011           3 :                         ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id);
    5012           3 :                         if (nvme_ns->ns != ns) {
    5013           1 :                                 assert(nvme_ns->ns == NULL);
    5014           1 :                                 nvme_ns->ns = ns;
    5015           1 :                                 NVME_CTRLR_DEBUGLOG(nvme_ctrlr, "NSID %u was added\n", nvme_ns->id);
    5016           1 :                         }
    5017             : 
    5018           3 :                         num_sectors = spdk_nvme_ns_get_num_sectors(ns);
    5019           3 :                         bdev = nvme_ns->bdev;
    5020           3 :                         assert(bdev != NULL);
    5021           3 :                         if (bdev->disk.blockcnt != num_sectors) {
    5022           1 :                                 NVME_CTRLR_NOTICELOG(nvme_ctrlr,
    5023             :                                                      "NSID %u is resized: bdev name %s, old size %" PRIu64 ", new size %" PRIu64 "\n",
    5024             :                                                      nvme_ns->id,
    5025             :                                                      bdev->disk.name,
    5026             :                                                      bdev->disk.blockcnt,
    5027             :                                                      num_sectors);
    5028           1 :                                 rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors);
    5029           1 :                                 if (rc != 0) {
    5030           0 :                                         NVME_CTRLR_ERRLOG(nvme_ctrlr,
    5031             :                                                           "Could not change num blocks for nvme bdev: name %s, errno: %d.\n",
    5032             :                                                           bdev->disk.name, rc);
    5033           0 :                                 }
    5034           1 :                         }
    5035           3 :                 } else {
    5036             :                         /* Namespace was removed */
    5037           1 :                         nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
    5038             :                 }
    5039             : 
    5040           4 :                 nvme_ns = next;
    5041             :         }
    5042             : 
    5043             :         /* Loop through all of the namespaces at the nvme level and see if any of them are new */
    5044          62 :         nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
    5045         116 :         while (nsid != 0) {
    5046          54 :                 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
    5047             : 
    5048          54 :                 if (nvme_ns == NULL) {
    5049             :                         /* Found a new one */
    5050          51 :                         nvme_ns = nvme_ns_alloc();
    5051          51 :                         if (nvme_ns == NULL) {
    5052           0 :                                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate namespace\n");
    5053             :                                 /* This just fails to attach the namespace. It may work on a future attempt. */
    5054           0 :                                 continue;
    5055             :                         }
    5056             : 
    5057          51 :                         nvme_ns->id = nsid;
    5058          51 :                         nvme_ns->ctrlr = nvme_ctrlr;
    5059             : 
    5060          51 :                         nvme_ns->bdev = NULL;
    5061             : 
    5062          51 :                         if (ctx) {
    5063          50 :                                 ctx->populates_in_progress++;
    5064          50 :                         }
    5065          51 :                         nvme_ns->probe_ctx = ctx;
    5066             : 
    5067          51 :                         RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    5068             : 
    5069          51 :                         nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns);
    5070          51 :                 }
    5071             : 
    5072          54 :                 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid);
    5073             :         }
    5074             : 
    5075          62 :         if (ctx) {
    5076             :                 /* Decrement this count now that the loop is over to account
    5077             :                  * for the one we started with.  If the count is then 0, we
    5078             :                  * know any populate_namespace functions completed immediately,
    5079             :                  * so we'll kick the callback here.
    5080             :                  */
    5081          46 :                 ctx->populates_in_progress--;
    5082          46 :                 if (ctx->populates_in_progress == 0) {
    5083          34 :                         nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
    5084          34 :                 }
    5085          46 :         }
    5086             : 
    5087          62 : }
    5088             : 
    5089             : static void
    5090          61 : nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
    5091             : {
    5092             :         struct nvme_ns *nvme_ns, *tmp;
    5093             : 
    5094         109 :         RB_FOREACH_SAFE(nvme_ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp) {
    5095          48 :                 nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
    5096          48 :         }
    5097          61 : }
    5098             : 
    5099             : static uint32_t
    5100          36 : nvme_ctrlr_get_ana_log_page_size(struct nvme_ctrlr *nvme_ctrlr)
    5101             : {
    5102          36 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    5103             :         const struct spdk_nvme_ctrlr_data *cdata;
    5104          36 :         uint32_t nsid, ns_count = 0;
    5105             : 
    5106          36 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5107             : 
    5108          80 :         for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
    5109          80 :              nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
    5110          44 :                 ns_count++;
    5111          44 :         }
    5112             : 
    5113          72 :         return sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
    5114          36 :                sizeof(struct spdk_nvme_ana_group_descriptor) + ns_count *
    5115             :                sizeof(uint32_t);
    5116             : }
    5117             : 
    5118             : static int
    5119           7 : nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
    5120             :                           void *cb_arg)
    5121             : {
    5122           7 :         struct nvme_ctrlr *nvme_ctrlr = cb_arg;
    5123             :         struct nvme_ns *nvme_ns;
    5124             :         uint32_t i, nsid;
    5125             : 
    5126          13 :         for (i = 0; i < desc->num_of_nsid; i++) {
    5127           6 :                 nsid = desc->nsid[i];
    5128           6 :                 if (nsid == 0) {
    5129           0 :                         continue;
    5130             :                 }
    5131             : 
    5132           6 :                 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
    5133             : 
    5134           6 :                 if (nvme_ns == NULL) {
    5135             :                         /* Target told us that an inactive namespace had an ANA change */
    5136           1 :                         continue;
    5137             :                 }
    5138             : 
    5139           5 :                 _nvme_ns_set_ana_state(nvme_ns, desc);
    5140           5 :         }
    5141             : 
    5142           7 :         return 0;
    5143             : }
    5144             : 
    5145             : static void
    5146           0 : bdev_nvme_disable_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
    5147             : {
    5148             :         struct nvme_ns *nvme_ns;
    5149             : 
    5150           0 :         spdk_free(nvme_ctrlr->ana_log_page);
    5151           0 :         nvme_ctrlr->ana_log_page = NULL;
    5152             : 
    5153           0 :         for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    5154           0 :              nvme_ns != NULL;
    5155           0 :              nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
    5156           0 :                 nvme_ns->ana_state_updating = false;
    5157           0 :                 nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
    5158           0 :         }
    5159           0 : }
    5160             : 
    5161             : static void
    5162           3 : nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
    5163             : {
    5164           3 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    5165             : 
    5166           3 :         if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
    5167           6 :                 bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
    5168           3 :                                              nvme_ctrlr);
    5169           3 :         } else {
    5170           0 :                 bdev_nvme_disable_read_ana_log_page(nvme_ctrlr);
    5171             :         }
    5172             : 
    5173           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5174             : 
    5175           3 :         assert(nvme_ctrlr->ana_log_page_updating == true);
    5176           3 :         nvme_ctrlr->ana_log_page_updating = false;
    5177             : 
    5178           3 :         if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    5179           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5180             : 
    5181           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    5182           0 :         } else {
    5183           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5184             : 
    5185           3 :                 bdev_nvme_clear_io_path_caches(nvme_ctrlr);
    5186             :         }
    5187           3 : }
    5188             : 
    5189             : static int
    5190           6 : nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
    5191             : {
    5192             :         uint32_t ana_log_page_size;
    5193             :         int rc;
    5194             : 
    5195           6 :         if (nvme_ctrlr->ana_log_page == NULL) {
    5196           0 :                 return -EINVAL;
    5197             :         }
    5198             : 
    5199           6 :         ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
    5200             : 
    5201           6 :         if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
    5202           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr,
    5203             :                                   "ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
    5204             :                                   ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
    5205           0 :                 return -EINVAL;
    5206             :         }
    5207             : 
    5208           6 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5209           6 :         if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
    5210           5 :             nvme_ctrlr->ana_log_page_updating) {
    5211           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5212           3 :                 return -EBUSY;
    5213             :         }
    5214             : 
    5215           3 :         nvme_ctrlr->ana_log_page_updating = true;
    5216           3 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5217             : 
    5218           6 :         rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
    5219             :                                               SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
    5220             :                                               SPDK_NVME_GLOBAL_NS_TAG,
    5221           3 :                                               nvme_ctrlr->ana_log_page,
    5222           3 :                                               ana_log_page_size, 0,
    5223             :                                               nvme_ctrlr_read_ana_log_page_done,
    5224           3 :                                               nvme_ctrlr);
    5225           3 :         if (rc != 0) {
    5226           0 :                 nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
    5227           0 :         }
    5228             : 
    5229           3 :         return rc;
    5230           6 : }
    5231             : 
    5232             : static void
    5233           0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
    5234             : {
    5235           0 : }
    5236             : 
    5237             : struct bdev_nvme_set_preferred_path_ctx {
    5238             :         struct spdk_bdev_desc *desc;
    5239             :         struct nvme_ns *nvme_ns;
    5240             :         bdev_nvme_set_preferred_path_cb cb_fn;
    5241             :         void *cb_arg;
    5242             : };
    5243             : 
    5244             : static void
    5245           3 : bdev_nvme_set_preferred_path_done(struct nvme_bdev *nbdev, void *_ctx, int status)
    5246             : {
    5247           3 :         struct bdev_nvme_set_preferred_path_ctx *ctx = _ctx;
    5248             : 
    5249           3 :         assert(ctx != NULL);
    5250           3 :         assert(ctx->desc != NULL);
    5251           3 :         assert(ctx->cb_fn != NULL);
    5252             : 
    5253           3 :         spdk_bdev_close(ctx->desc);
    5254             : 
    5255           3 :         ctx->cb_fn(ctx->cb_arg, status);
    5256             : 
    5257           3 :         free(ctx);
    5258           3 : }
    5259             : 
    5260             : static void
    5261           2 : _bdev_nvme_set_preferred_path(struct nvme_bdev_channel_iter *i,
    5262             :                               struct nvme_bdev *nbdev,
    5263             :                               struct nvme_bdev_channel *nbdev_ch, void *_ctx)
    5264             : {
    5265           2 :         struct bdev_nvme_set_preferred_path_ctx *ctx = _ctx;
    5266             :         struct nvme_io_path *io_path, *prev;
    5267             : 
    5268           2 :         prev = NULL;
    5269           3 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    5270           3 :                 if (io_path->nvme_ns == ctx->nvme_ns) {
    5271           2 :                         break;
    5272             :                 }
    5273           1 :                 prev = io_path;
    5274           1 :         }
    5275             : 
    5276           2 :         if (io_path != NULL) {
    5277           2 :                 if (prev != NULL) {
    5278           1 :                         STAILQ_REMOVE_AFTER(&nbdev_ch->io_path_list, prev, stailq);
    5279           1 :                         STAILQ_INSERT_HEAD(&nbdev_ch->io_path_list, io_path, stailq);
    5280           1 :                 }
    5281             : 
    5282             :                 /* We can set io_path to nbdev_ch->current_io_path directly here.
    5283             :                  * However, it needs to be conditional. To simplify the code,
    5284             :                  * just clear nbdev_ch->current_io_path and let find_io_path()
    5285             :                  * fill it.
    5286             :                  *
    5287             :                  * Automatic failback may be disabled. Hence even if the io_path is
    5288             :                  * already at the head, clear nbdev_ch->current_io_path.
    5289             :                  */
    5290           2 :                 bdev_nvme_clear_current_io_path(nbdev_ch);
    5291           2 :         }
    5292             : 
    5293           2 :         nvme_bdev_for_each_channel_continue(i, 0);
    5294           2 : }
    5295             : 
    5296             : static struct nvme_ns *
    5297           3 : bdev_nvme_set_preferred_ns(struct nvme_bdev *nbdev, uint16_t cntlid)
    5298             : {
    5299             :         struct nvme_ns *nvme_ns, *prev;
    5300             :         const struct spdk_nvme_ctrlr_data *cdata;
    5301             : 
    5302           3 :         prev = NULL;
    5303           6 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
    5304           6 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
    5305             : 
    5306           6 :                 if (cdata->cntlid == cntlid) {
    5307           3 :                         break;
    5308             :                 }
    5309           3 :                 prev = nvme_ns;
    5310           3 :         }
    5311             : 
    5312           3 :         if (nvme_ns != NULL && prev != NULL) {
    5313           2 :                 TAILQ_REMOVE(&nbdev->nvme_ns_list, nvme_ns, tailq);
    5314           2 :                 TAILQ_INSERT_HEAD(&nbdev->nvme_ns_list, nvme_ns, tailq);
    5315           2 :         }
    5316             : 
    5317           3 :         return nvme_ns;
    5318             : }
    5319             : 
    5320             : /* This function supports only multipath mode. There is only a single I/O path
    5321             :  * for each NVMe-oF controller. Hence, just move the matched I/O path to the
    5322             :  * head of the I/O path list for each NVMe bdev channel.
    5323             :  *
    5324             :  * NVMe bdev channel may be acquired after completing this function. move the
    5325             :  * matched namespace to the head of the namespace list for the NVMe bdev too.
    5326             :  */
    5327             : void
    5328           3 : bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid,
    5329             :                              bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg)
    5330             : {
    5331             :         struct bdev_nvme_set_preferred_path_ctx *ctx;
    5332             :         struct spdk_bdev *bdev;
    5333             :         struct nvme_bdev *nbdev;
    5334           3 :         int rc = 0;
    5335             : 
    5336           3 :         assert(cb_fn != NULL);
    5337             : 
    5338           3 :         ctx = calloc(1, sizeof(*ctx));
    5339           3 :         if (ctx == NULL) {
    5340           0 :                 SPDK_ERRLOG("Failed to alloc context.\n");
    5341           0 :                 rc = -ENOMEM;
    5342           0 :                 goto err_alloc;
    5343             :         }
    5344             : 
    5345           3 :         ctx->cb_fn = cb_fn;
    5346           3 :         ctx->cb_arg = cb_arg;
    5347             : 
    5348           3 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
    5349           3 :         if (rc != 0) {
    5350           0 :                 SPDK_ERRLOG("Failed to open bdev %s.\n", name);
    5351           0 :                 goto err_open;
    5352             :         }
    5353             : 
    5354           3 :         bdev = spdk_bdev_desc_get_bdev(ctx->desc);
    5355             : 
    5356           3 :         if (bdev->module != &nvme_if) {
    5357           0 :                 SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
    5358           0 :                 rc = -ENODEV;
    5359           0 :                 goto err_bdev;
    5360             :         }
    5361             : 
    5362           3 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    5363             : 
    5364           3 :         pthread_mutex_lock(&nbdev->mutex);
    5365             : 
    5366           3 :         ctx->nvme_ns = bdev_nvme_set_preferred_ns(nbdev, cntlid);
    5367           3 :         if (ctx->nvme_ns == NULL) {
    5368           0 :                 pthread_mutex_unlock(&nbdev->mutex);
    5369             : 
    5370           0 :                 SPDK_ERRLOG("bdev %s does not have namespace to controller %u.\n", name, cntlid);
    5371           0 :                 rc = -ENODEV;
    5372           0 :                 goto err_bdev;
    5373             :         }
    5374             : 
    5375           3 :         pthread_mutex_unlock(&nbdev->mutex);
    5376             : 
    5377           6 :         nvme_bdev_for_each_channel(nbdev,
    5378             :                                    _bdev_nvme_set_preferred_path,
    5379           3 :                                    ctx,
    5380             :                                    bdev_nvme_set_preferred_path_done);
    5381           3 :         return;
    5382             : 
    5383             : err_bdev:
    5384           0 :         spdk_bdev_close(ctx->desc);
    5385             : err_open:
    5386           0 :         free(ctx);
    5387             : err_alloc:
    5388           0 :         cb_fn(cb_arg, rc);
    5389           3 : }
    5390             : 
    5391             : struct bdev_nvme_set_multipath_policy_ctx {
    5392             :         struct spdk_bdev_desc *desc;
    5393             :         spdk_bdev_nvme_set_multipath_policy_cb cb_fn;
    5394             :         void *cb_arg;
    5395             : };
    5396             : 
    5397             : static void
    5398           3 : bdev_nvme_set_multipath_policy_done(struct nvme_bdev *nbdev, void *_ctx, int status)
    5399             : {
    5400           3 :         struct bdev_nvme_set_multipath_policy_ctx *ctx = _ctx;
    5401             : 
    5402           3 :         assert(ctx != NULL);
    5403           3 :         assert(ctx->desc != NULL);
    5404           3 :         assert(ctx->cb_fn != NULL);
    5405             : 
    5406           3 :         spdk_bdev_close(ctx->desc);
    5407             : 
    5408           3 :         ctx->cb_fn(ctx->cb_arg, status);
    5409             : 
    5410           3 :         free(ctx);
    5411           3 : }
    5412             : 
    5413             : static void
    5414           1 : _bdev_nvme_set_multipath_policy(struct nvme_bdev_channel_iter *i,
    5415             :                                 struct nvme_bdev *nbdev,
    5416             :                                 struct nvme_bdev_channel *nbdev_ch, void *ctx)
    5417             : {
    5418           1 :         nbdev_ch->mp_policy = nbdev->mp_policy;
    5419           1 :         nbdev_ch->mp_selector = nbdev->mp_selector;
    5420           1 :         nbdev_ch->rr_min_io = nbdev->rr_min_io;
    5421           1 :         bdev_nvme_clear_current_io_path(nbdev_ch);
    5422             : 
    5423           1 :         nvme_bdev_for_each_channel_continue(i, 0);
    5424           1 : }
    5425             : 
    5426             : void
    5427           3 : spdk_bdev_nvme_set_multipath_policy(const char *name, enum spdk_bdev_nvme_multipath_policy policy,
    5428             :                                     enum spdk_bdev_nvme_multipath_selector selector, uint32_t rr_min_io,
    5429             :                                     spdk_bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
    5430             : {
    5431             :         struct bdev_nvme_set_multipath_policy_ctx *ctx;
    5432             :         struct spdk_bdev *bdev;
    5433             :         struct nvme_bdev *nbdev;
    5434             :         int rc;
    5435             : 
    5436           3 :         assert(cb_fn != NULL);
    5437             : 
    5438           3 :         switch (policy) {
    5439             :         case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
    5440           1 :                 break;
    5441             :         case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
    5442           2 :                 switch (selector) {
    5443             :                 case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
    5444           1 :                         if (rr_min_io == UINT32_MAX) {
    5445           0 :                                 rr_min_io = 1;
    5446           1 :                         } else if (rr_min_io == 0) {
    5447           0 :                                 rc = -EINVAL;
    5448           0 :                                 goto exit;
    5449             :                         }
    5450           1 :                         break;
    5451             :                 case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
    5452           1 :                         break;
    5453             :                 default:
    5454           0 :                         rc = -EINVAL;
    5455           0 :                         goto exit;
    5456             :                 }
    5457           2 :                 break;
    5458             :         default:
    5459           0 :                 rc = -EINVAL;
    5460           0 :                 goto exit;
    5461             :         }
    5462             : 
    5463           3 :         ctx = calloc(1, sizeof(*ctx));
    5464           3 :         if (ctx == NULL) {
    5465           0 :                 SPDK_ERRLOG("Failed to alloc context.\n");
    5466           0 :                 rc = -ENOMEM;
    5467           0 :                 goto exit;
    5468             :         }
    5469             : 
    5470           3 :         ctx->cb_fn = cb_fn;
    5471           3 :         ctx->cb_arg = cb_arg;
    5472             : 
    5473           3 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
    5474           3 :         if (rc != 0) {
    5475           0 :                 SPDK_ERRLOG("Failed to open bdev %s.\n", name);
    5476           0 :                 rc = -ENODEV;
    5477           0 :                 goto err_open;
    5478             :         }
    5479             : 
    5480           3 :         bdev = spdk_bdev_desc_get_bdev(ctx->desc);
    5481           3 :         if (bdev->module != &nvme_if) {
    5482           0 :                 SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
    5483           0 :                 rc = -ENODEV;
    5484           0 :                 goto err_module;
    5485             :         }
    5486           3 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    5487             : 
    5488           3 :         pthread_mutex_lock(&nbdev->mutex);
    5489           3 :         nbdev->mp_policy = policy;
    5490           3 :         nbdev->mp_selector = selector;
    5491           3 :         nbdev->rr_min_io = rr_min_io;
    5492           3 :         pthread_mutex_unlock(&nbdev->mutex);
    5493             : 
    5494           6 :         nvme_bdev_for_each_channel(nbdev,
    5495             :                                    _bdev_nvme_set_multipath_policy,
    5496           3 :                                    ctx,
    5497             :                                    bdev_nvme_set_multipath_policy_done);
    5498           3 :         return;
    5499             : 
    5500             : err_module:
    5501           0 :         spdk_bdev_close(ctx->desc);
    5502             : err_open:
    5503           0 :         free(ctx);
    5504             : exit:
    5505           0 :         cb_fn(cb_arg, rc);
    5506           3 : }
    5507             : 
    5508             : static void
    5509           3 : aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
    5510             : {
    5511           3 :         struct nvme_ctrlr *nvme_ctrlr           = arg;
    5512             :         union spdk_nvme_async_event_completion  event;
    5513             : 
    5514           3 :         if (spdk_nvme_cpl_is_error(cpl)) {
    5515           0 :                 SPDK_WARNLOG("AER request execute failed\n");
    5516           0 :                 return;
    5517             :         }
    5518             : 
    5519           3 :         event.raw = cpl->cdw0;
    5520           3 :         if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
    5521           3 :             (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
    5522           2 :                 nvme_ctrlr_populate_namespaces(nvme_ctrlr, NULL);
    5523           3 :         } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
    5524           1 :                    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
    5525           1 :                 nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
    5526           1 :         }
    5527           3 : }
    5528             : 
    5529             : static void
    5530          52 : free_nvme_async_probe_ctx(struct nvme_async_probe_ctx *ctx)
    5531             : {
    5532          52 :         spdk_keyring_put_key(ctx->drv_opts.tls_psk);
    5533          52 :         spdk_keyring_put_key(ctx->drv_opts.dhchap_key);
    5534          52 :         spdk_keyring_put_key(ctx->drv_opts.dhchap_ctrlr_key);
    5535          52 :         free(ctx);
    5536          52 : }
    5537             : 
    5538             : static void
    5539          52 : populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, int rc)
    5540             : {
    5541          52 :         if (ctx->cb_fn) {
    5542          52 :                 ctx->cb_fn(ctx->cb_ctx, ctx->reported_bdevs, rc);
    5543          52 :         }
    5544             : 
    5545          52 :         ctx->namespaces_populated = true;
    5546          52 :         if (ctx->probe_done) {
    5547             :                 /* The probe was already completed, so we need to free the context
    5548             :                  * here.  This can happen for cases like OCSSD, where we need to
    5549             :                  * send additional commands to the SSD after attach.
    5550             :                  */
    5551          31 :                 free_nvme_async_probe_ctx(ctx);
    5552          31 :         }
    5553          52 : }
    5554             : 
    5555             : static int
    5556          20 : bdev_nvme_remove_poller(void *ctx)
    5557             : {
    5558             :         struct spdk_nvme_transport_id trid_pcie;
    5559             : 
    5560          20 :         if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    5561           1 :                 spdk_poller_unregister(&g_hotplug_poller);
    5562           1 :                 return SPDK_POLLER_IDLE;
    5563             :         }
    5564             : 
    5565          19 :         memset(&trid_pcie, 0, sizeof(trid_pcie));
    5566          19 :         spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
    5567             : 
    5568          19 :         if (spdk_nvme_scan_attached(&trid_pcie)) {
    5569           0 :                 SPDK_ERRLOG_RATELIMIT("spdk_nvme_scan_attached() failed\n");
    5570           0 :         }
    5571             : 
    5572          19 :         return SPDK_POLLER_BUSY;
    5573          20 : }
    5574             : 
    5575             : static void
    5576          60 : nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr,
    5577             :                        struct nvme_async_probe_ctx *ctx)
    5578             : {
    5579          60 :         struct spdk_nvme_transport_id *trid = &nvme_ctrlr->active_path_id->trid;
    5580             : 
    5581          60 :         if (spdk_nvme_trtype_is_fabrics(trid->trtype)) {
    5582          60 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was created to %s:%s\n",
    5583             :                                    trid->traddr, trid->trsvcid);
    5584          60 :         } else {
    5585           0 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was created\n");
    5586             :         }
    5587             : 
    5588         120 :         spdk_io_device_register(nvme_ctrlr,
    5589             :                                 bdev_nvme_create_ctrlr_channel_cb,
    5590             :                                 bdev_nvme_destroy_ctrlr_channel_cb,
    5591             :                                 sizeof(struct nvme_ctrlr_channel),
    5592          60 :                                 nvme_ctrlr->nbdev_ctrlr->name);
    5593             : 
    5594          60 :         nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx);
    5595             : 
    5596          60 :         if (g_hotplug_poller == NULL) {
    5597           2 :                 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
    5598             :                                                         NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
    5599           2 :         }
    5600          60 : }
    5601             : 
    5602             : static void
    5603          30 : nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl)
    5604             : {
    5605          30 :         struct nvme_ctrlr *nvme_ctrlr = _ctx;
    5606          30 :         struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx;
    5607             : 
    5608          30 :         nvme_ctrlr->probe_ctx = NULL;
    5609             : 
    5610          30 :         if (spdk_nvme_cpl_is_error(cpl)) {
    5611           0 :                 nvme_ctrlr_delete(nvme_ctrlr);
    5612             : 
    5613           0 :                 if (ctx != NULL) {
    5614           0 :                         ctx->reported_bdevs = 0;
    5615           0 :                         populate_namespaces_cb(ctx, -1);
    5616           0 :                 }
    5617           0 :                 return;
    5618             :         }
    5619             : 
    5620          30 :         nvme_ctrlr_create_done(nvme_ctrlr, ctx);
    5621          30 : }
    5622             : 
    5623             : static int
    5624          30 : nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
    5625             :                              struct nvme_async_probe_ctx *ctx)
    5626             : {
    5627          30 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    5628             :         const struct spdk_nvme_ctrlr_data *cdata;
    5629             :         uint32_t ana_log_page_size;
    5630             : 
    5631          30 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5632             : 
    5633             :         /* Set buffer size enough to include maximum number of allowed namespaces. */
    5634          60 :         ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
    5635          30 :                             sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->mnan *
    5636             :                             sizeof(uint32_t);
    5637             : 
    5638          30 :         nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL,
    5639             :                                                 SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    5640          30 :         if (nvme_ctrlr->ana_log_page == NULL) {
    5641           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "could not allocate ANA log page buffer\n");
    5642           0 :                 return -ENXIO;
    5643             :         }
    5644             : 
    5645             :         /* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned.
    5646             :          * Hence copy each descriptor to a temporary area when parsing it.
    5647             :          *
    5648             :          * Allocate a buffer whose size is as large as ANA log page buffer because
    5649             :          * we do not know the size of a descriptor until actually reading it.
    5650             :          */
    5651          30 :         nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
    5652          30 :         if (nvme_ctrlr->copied_ana_desc == NULL) {
    5653           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "could not allocate a buffer to parse ANA descriptor\n");
    5654           0 :                 return -ENOMEM;
    5655             :         }
    5656             : 
    5657          30 :         nvme_ctrlr->max_ana_log_page_size = ana_log_page_size;
    5658             : 
    5659          30 :         nvme_ctrlr->probe_ctx = ctx;
    5660             : 
    5661             :         /* Then, set the read size only to include the current active namespaces. */
    5662          30 :         ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
    5663             : 
    5664          30 :         if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
    5665           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
    5666             :                                   ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
    5667           0 :                 return -EINVAL;
    5668             :         }
    5669             : 
    5670          60 :         return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
    5671             :                                                 SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
    5672             :                                                 SPDK_NVME_GLOBAL_NS_TAG,
    5673          30 :                                                 nvme_ctrlr->ana_log_page,
    5674          30 :                                                 ana_log_page_size, 0,
    5675             :                                                 nvme_ctrlr_init_ana_log_page_done,
    5676          30 :                                                 nvme_ctrlr);
    5677          30 : }
    5678             : 
    5679             : /* hostnqn and subnqn were already verified before attaching a controller.
    5680             :  * Hence check only the multipath capability and cntlid here.
    5681             :  */
    5682             : static bool
    5683          16 : bdev_nvme_check_multipath(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct spdk_nvme_ctrlr *ctrlr)
    5684             : {
    5685             :         struct nvme_ctrlr *tmp;
    5686             :         const struct spdk_nvme_ctrlr_data *cdata, *tmp_cdata;
    5687             : 
    5688          16 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5689             : 
    5690          16 :         if (!cdata->cmic.multi_ctrlr) {
    5691           0 :                 SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
    5692           0 :                 return false;
    5693             :         }
    5694             : 
    5695          33 :         TAILQ_FOREACH(tmp, &nbdev_ctrlr->ctrlrs, tailq) {
    5696          18 :                 tmp_cdata = spdk_nvme_ctrlr_get_data(tmp->ctrlr);
    5697             : 
    5698          18 :                 if (!tmp_cdata->cmic.multi_ctrlr) {
    5699           0 :                         NVME_CTRLR_ERRLOG(tmp, "Ctrlr%u does not support multipath.\n", cdata->cntlid);
    5700           0 :                         return false;
    5701             :                 }
    5702          18 :                 if (cdata->cntlid == tmp_cdata->cntlid) {
    5703           1 :                         NVME_CTRLR_ERRLOG(tmp, "cntlid %u are duplicated.\n", tmp_cdata->cntlid);
    5704           1 :                         return false;
    5705             :                 }
    5706          17 :         }
    5707             : 
    5708          15 :         return true;
    5709          16 : }
    5710             : 
    5711             : 
    5712             : static int
    5713          61 : nvme_bdev_ctrlr_create(const char *name, struct nvme_ctrlr *nvme_ctrlr)
    5714             : {
    5715             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    5716          61 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    5717             :         struct nvme_ctrlr      *nctrlr;
    5718          61 :         int rc = 0;
    5719             : 
    5720          61 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    5721             : 
    5722          61 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    5723          61 :         if (nbdev_ctrlr != NULL) {
    5724          16 :                 if (!bdev_nvme_check_multipath(nbdev_ctrlr, ctrlr)) {
    5725           1 :                         rc = -EINVAL;
    5726           1 :                         goto exit;
    5727             :                 }
    5728          32 :                 TAILQ_FOREACH(nctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    5729          17 :                         if (nctrlr->opts.multipath != nvme_ctrlr->opts.multipath) {
    5730             :                                 /* All controllers with the same name must be configured the same
    5731             :                                  * way, either for multipath or failover. If the configuration doesn't
    5732             :                                  * match - report error.
    5733             :                                  */
    5734           0 :                                 rc = -EINVAL;
    5735           0 :                                 goto exit;
    5736             :                         }
    5737          17 :                 }
    5738          15 :         } else {
    5739          45 :                 nbdev_ctrlr = calloc(1, sizeof(*nbdev_ctrlr));
    5740          45 :                 if (nbdev_ctrlr == NULL) {
    5741           0 :                         NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate nvme_bdev_ctrlr.\n");
    5742           0 :                         rc = -ENOMEM;
    5743           0 :                         goto exit;
    5744             :                 }
    5745          45 :                 nbdev_ctrlr->name = strdup(name);
    5746          45 :                 if (nbdev_ctrlr->name == NULL) {
    5747           0 :                         NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate name of nvme_bdev_ctrlr.\n");
    5748           0 :                         free(nbdev_ctrlr);
    5749           0 :                         goto exit;
    5750             :                 }
    5751          45 :                 TAILQ_INIT(&nbdev_ctrlr->ctrlrs);
    5752          45 :                 TAILQ_INIT(&nbdev_ctrlr->bdevs);
    5753          45 :                 TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
    5754             :         }
    5755          60 :         nvme_ctrlr->nbdev_ctrlr = nbdev_ctrlr;
    5756          60 :         TAILQ_INSERT_TAIL(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
    5757             : exit:
    5758          61 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    5759          61 :         return rc;
    5760             : }
    5761             : 
    5762             : static int
    5763          61 : nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
    5764             :                   const char *name,
    5765             :                   const struct spdk_nvme_transport_id *trid,
    5766             :                   struct nvme_async_probe_ctx *ctx)
    5767             : {
    5768             :         struct nvme_ctrlr *nvme_ctrlr;
    5769             :         struct nvme_path_id *path_id;
    5770             :         const struct spdk_nvme_ctrlr_data *cdata;
    5771             :         int rc;
    5772             : 
    5773          61 :         nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
    5774          61 :         if (nvme_ctrlr == NULL) {
    5775           0 :                 SPDK_ERRLOG("Failed to allocate device struct\n");
    5776           0 :                 return -ENOMEM;
    5777             :         }
    5778             : 
    5779          61 :         rc = pthread_mutex_init(&nvme_ctrlr->mutex, NULL);
    5780          61 :         if (rc != 0) {
    5781           0 :                 free(nvme_ctrlr);
    5782           0 :                 return rc;
    5783             :         }
    5784             : 
    5785          61 :         TAILQ_INIT(&nvme_ctrlr->trids);
    5786          61 :         RB_INIT(&nvme_ctrlr->namespaces);
    5787             : 
    5788             :         /* Get another reference to the key, so the first one can be released from probe_ctx */
    5789          61 :         if (ctx != NULL) {
    5790          47 :                 if (ctx->drv_opts.tls_psk != NULL) {
    5791           0 :                         nvme_ctrlr->psk = spdk_keyring_get_key(
    5792           0 :                                                   spdk_key_get_name(ctx->drv_opts.tls_psk));
    5793           0 :                         if (nvme_ctrlr->psk == NULL) {
    5794             :                                 /* Could only happen if the key was removed in the meantime */
    5795           0 :                                 SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
    5796             :                                             spdk_key_get_name(ctx->drv_opts.tls_psk));
    5797           0 :                                 rc = -ENOKEY;
    5798           0 :                                 goto err;
    5799             :                         }
    5800           0 :                 }
    5801             : 
    5802          47 :                 if (ctx->drv_opts.dhchap_key != NULL) {
    5803           0 :                         nvme_ctrlr->dhchap_key = spdk_keyring_get_key(
    5804           0 :                                                          spdk_key_get_name(ctx->drv_opts.dhchap_key));
    5805           0 :                         if (nvme_ctrlr->dhchap_key == NULL) {
    5806           0 :                                 SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
    5807             :                                             spdk_key_get_name(ctx->drv_opts.dhchap_key));
    5808           0 :                                 rc = -ENOKEY;
    5809           0 :                                 goto err;
    5810             :                         }
    5811           0 :                 }
    5812             : 
    5813          47 :                 if (ctx->drv_opts.dhchap_ctrlr_key != NULL) {
    5814           0 :                         nvme_ctrlr->dhchap_ctrlr_key =
    5815           0 :                                 spdk_keyring_get_key(
    5816           0 :                                         spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
    5817           0 :                         if (nvme_ctrlr->dhchap_ctrlr_key == NULL) {
    5818           0 :                                 SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
    5819             :                                             spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
    5820           0 :                                 rc = -ENOKEY;
    5821           0 :                                 goto err;
    5822             :                         }
    5823           0 :                 }
    5824          47 :         }
    5825             : 
    5826          61 :         path_id = calloc(1, sizeof(*path_id));
    5827          61 :         if (path_id == NULL) {
    5828           0 :                 SPDK_ERRLOG("Failed to allocate trid entry pointer\n");
    5829           0 :                 rc = -ENOMEM;
    5830           0 :                 goto err;
    5831             :         }
    5832             : 
    5833          61 :         path_id->trid = *trid;
    5834          61 :         if (ctx != NULL) {
    5835          47 :                 memcpy(path_id->hostid.hostaddr, ctx->drv_opts.src_addr, sizeof(path_id->hostid.hostaddr));
    5836          47 :                 memcpy(path_id->hostid.hostsvcid, ctx->drv_opts.src_svcid, sizeof(path_id->hostid.hostsvcid));
    5837          47 :         }
    5838          61 :         nvme_ctrlr->active_path_id = path_id;
    5839          61 :         TAILQ_INSERT_HEAD(&nvme_ctrlr->trids, path_id, link);
    5840             : 
    5841          61 :         nvme_ctrlr->thread = spdk_get_thread();
    5842          61 :         nvme_ctrlr->ctrlr = ctrlr;
    5843          61 :         nvme_ctrlr->ref = 1;
    5844             : 
    5845          61 :         if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
    5846           0 :                 SPDK_ERRLOG("OCSSDs are not supported");
    5847           0 :                 rc = -ENOTSUP;
    5848           0 :                 goto err;
    5849             :         }
    5850             : 
    5851          61 :         if (ctx != NULL) {
    5852          47 :                 memcpy(&nvme_ctrlr->opts, &ctx->bdev_opts, sizeof(ctx->bdev_opts));
    5853          47 :         } else {
    5854          14 :                 spdk_bdev_nvme_get_default_ctrlr_opts(&nvme_ctrlr->opts);
    5855             :         }
    5856             : 
    5857          61 :         nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr,
    5858             :                                           g_opts.nvme_adminq_poll_period_us);
    5859             : 
    5860          61 :         if (g_opts.timeout_us > 0) {
    5861             :                 /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */
    5862             :                 /* If timeout_admin_us is 0 (not specified), admin uses same timeout as IO. */
    5863           0 :                 uint64_t adm_timeout_us = (g_opts.timeout_admin_us == 0) ?
    5864           0 :                                           g_opts.timeout_us : g_opts.timeout_admin_us;
    5865           0 :                 spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
    5866           0 :                                 adm_timeout_us, timeout_cb, nvme_ctrlr);
    5867           0 :         }
    5868             : 
    5869          61 :         spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
    5870          61 :         spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr);
    5871             : 
    5872          61 :         if (spdk_nvme_ctrlr_get_flags(ctrlr) &
    5873             :             SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
    5874           0 :                 nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr);
    5875           0 :         }
    5876             : 
    5877          61 :         rc = nvme_bdev_ctrlr_create(name, nvme_ctrlr);
    5878          61 :         if (rc != 0) {
    5879           1 :                 goto err;
    5880             :         }
    5881             : 
    5882          60 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5883             : 
    5884          60 :         if (cdata->cmic.ana_reporting) {
    5885          30 :                 rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx);
    5886          30 :                 if (rc == 0) {
    5887          30 :                         return 0;
    5888             :                 }
    5889           0 :         } else {
    5890          30 :                 nvme_ctrlr_create_done(nvme_ctrlr, ctx);
    5891          30 :                 return 0;
    5892             :         }
    5893             : 
    5894             : err:
    5895           1 :         nvme_ctrlr_delete(nvme_ctrlr);
    5896           1 :         return rc;
    5897          61 : }
    5898             : 
    5899             : void
    5900          33 : spdk_bdev_nvme_get_default_ctrlr_opts(struct spdk_bdev_nvme_ctrlr_opts *opts)
    5901             : {
    5902          33 :         opts->prchk_flags = 0;
    5903          33 :         opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
    5904          33 :         opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
    5905          33 :         opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
    5906          33 :         opts->multipath = true;
    5907          33 : }
    5908             : 
    5909             : static void
    5910           0 : attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    5911             :           struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
    5912             : {
    5913             :         char *name;
    5914             : 
    5915           0 :         name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
    5916           0 :         if (!name) {
    5917           0 :                 SPDK_ERRLOG("Failed to assign name to NVMe device\n");
    5918           0 :                 return;
    5919             :         }
    5920             : 
    5921           0 :         if (nvme_ctrlr_create(ctrlr, name, trid, NULL) == 0) {
    5922           0 :                 SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name);
    5923           0 :         } else {
    5924           0 :                 SPDK_ERRLOG("Failed to attach to %s (%s)\n", trid->traddr, name);
    5925             :         }
    5926             : 
    5927           0 :         free(name);
    5928           0 : }
    5929             : 
    5930             : static void
    5931          60 : _nvme_ctrlr_destruct(void *ctx)
    5932             : {
    5933          60 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    5934             : 
    5935          60 :         nvme_ctrlr_depopulate_namespaces(nvme_ctrlr);
    5936          60 :         nvme_ctrlr_release(nvme_ctrlr);
    5937          60 : }
    5938             : 
    5939             : static int
    5940          57 : bdev_nvme_delete_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
    5941             : {
    5942             :         struct nvme_probe_skip_entry *entry;
    5943             : 
    5944             :         /* The controller's destruction was already started */
    5945          57 :         if (nvme_ctrlr->destruct) {
    5946           0 :                 return -EALREADY;
    5947             :         }
    5948             : 
    5949          57 :         if (!hotplug &&
    5950          57 :             nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
    5951           0 :                 entry = calloc(1, sizeof(*entry));
    5952           0 :                 if (!entry) {
    5953           0 :                         return -ENOMEM;
    5954             :                 }
    5955           0 :                 entry->trid = nvme_ctrlr->active_path_id->trid;
    5956           0 :                 TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
    5957           0 :         }
    5958             : 
    5959          57 :         nvme_ctrlr->destruct = true;
    5960          57 :         return 0;
    5961          57 : }
    5962             : 
    5963             : static int
    5964           2 : bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
    5965             : {
    5966             :         int rc;
    5967             : 
    5968           2 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5969           2 :         rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, hotplug);
    5970           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5971             : 
    5972           2 :         if (rc == 0) {
    5973           2 :                 _nvme_ctrlr_destruct(nvme_ctrlr);
    5974           2 :         } else if (rc == -EALREADY) {
    5975           0 :                 rc = 0;
    5976           0 :         }
    5977             : 
    5978           2 :         return rc;
    5979             : }
    5980             : 
    5981             : static void
    5982           0 : remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
    5983             : {
    5984           0 :         struct nvme_ctrlr *nvme_ctrlr = cb_ctx;
    5985             : 
    5986           0 :         bdev_nvme_delete_ctrlr(nvme_ctrlr, true);
    5987           0 : }
    5988             : 
    5989             : static int
    5990           0 : bdev_nvme_hotplug_probe(void *arg)
    5991             : {
    5992           0 :         if (g_hotplug_probe_ctx == NULL) {
    5993           0 :                 spdk_poller_unregister(&g_hotplug_probe_poller);
    5994           0 :                 return SPDK_POLLER_IDLE;
    5995             :         }
    5996             : 
    5997           0 :         if (spdk_nvme_probe_poll_async(g_hotplug_probe_ctx) != -EAGAIN) {
    5998           0 :                 g_hotplug_probe_ctx = NULL;
    5999           0 :                 spdk_poller_unregister(&g_hotplug_probe_poller);
    6000           0 :         }
    6001             : 
    6002           0 :         return SPDK_POLLER_BUSY;
    6003           0 : }
    6004             : 
    6005             : static int
    6006           0 : bdev_nvme_hotplug(void *arg)
    6007             : {
    6008             :         struct spdk_nvme_transport_id trid_pcie;
    6009             : 
    6010           0 :         if (g_hotplug_probe_ctx) {
    6011           0 :                 return SPDK_POLLER_BUSY;
    6012             :         }
    6013             : 
    6014           0 :         memset(&trid_pcie, 0, sizeof(trid_pcie));
    6015           0 :         spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
    6016             : 
    6017           0 :         g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
    6018             :                               hotplug_probe_cb, attach_cb, NULL);
    6019             : 
    6020           0 :         if (g_hotplug_probe_ctx) {
    6021           0 :                 assert(g_hotplug_probe_poller == NULL);
    6022           0 :                 g_hotplug_probe_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug_probe, NULL, 1000);
    6023           0 :         }
    6024             : 
    6025           0 :         return SPDK_POLLER_BUSY;
    6026           0 : }
    6027             : 
    6028             : void
    6029           0 : bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
    6030             : {
    6031           0 :         *opts = g_opts;
    6032           0 : }
    6033             : 
    6034             : static bool bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
    6035             :                 uint32_t reconnect_delay_sec,
    6036             :                 uint32_t fast_io_fail_timeout_sec);
    6037             : 
    6038             : static int
    6039           0 : bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
    6040             : {
    6041           0 :         if ((opts->timeout_us == 0) && (opts->timeout_admin_us != 0)) {
    6042             :                 /* Can't set timeout_admin_us without also setting timeout_us */
    6043           0 :                 SPDK_WARNLOG("Invalid options: Can't have (timeout_us == 0) with (timeout_admin_us > 0)\n");
    6044           0 :                 return -EINVAL;
    6045             :         }
    6046             : 
    6047           0 :         if (opts->bdev_retry_count < -1) {
    6048           0 :                 SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n");
    6049           0 :                 return -EINVAL;
    6050             :         }
    6051             : 
    6052           0 :         if (!bdev_nvme_check_io_error_resiliency_params(opts->ctrlr_loss_timeout_sec,
    6053           0 :                         opts->reconnect_delay_sec,
    6054           0 :                         opts->fast_io_fail_timeout_sec)) {
    6055           0 :                 return -EINVAL;
    6056             :         }
    6057             : 
    6058           0 :         return 0;
    6059           0 : }
    6060             : 
    6061             : int
    6062           0 : bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
    6063             : {
    6064             :         int ret;
    6065             : 
    6066           0 :         ret = bdev_nvme_validate_opts(opts);
    6067           0 :         if (ret) {
    6068           0 :                 SPDK_WARNLOG("Failed to set nvme opts.\n");
    6069           0 :                 return ret;
    6070             :         }
    6071             : 
    6072           0 :         if (g_bdev_nvme_init_thread != NULL) {
    6073           0 :                 if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    6074           0 :                         return -EPERM;
    6075             :                 }
    6076           0 :         }
    6077             : 
    6078           0 :         if (opts->rdma_srq_size != 0 ||
    6079           0 :             opts->rdma_max_cq_size != 0 ||
    6080           0 :             opts->rdma_cm_event_timeout_ms != 0) {
    6081             :                 struct spdk_nvme_transport_opts drv_opts;
    6082             : 
    6083           0 :                 spdk_nvme_transport_get_opts(&drv_opts, sizeof(drv_opts));
    6084           0 :                 if (opts->rdma_srq_size != 0) {
    6085           0 :                         drv_opts.rdma_srq_size = opts->rdma_srq_size;
    6086           0 :                 }
    6087           0 :                 if (opts->rdma_max_cq_size != 0) {
    6088           0 :                         drv_opts.rdma_max_cq_size = opts->rdma_max_cq_size;
    6089           0 :                 }
    6090           0 :                 if (opts->rdma_cm_event_timeout_ms != 0) {
    6091           0 :                         drv_opts.rdma_cm_event_timeout_ms = opts->rdma_cm_event_timeout_ms;
    6092           0 :                 }
    6093             : 
    6094           0 :                 ret = spdk_nvme_transport_set_opts(&drv_opts, sizeof(drv_opts));
    6095           0 :                 if (ret) {
    6096           0 :                         SPDK_ERRLOG("Failed to set NVMe transport opts.\n");
    6097           0 :                         return ret;
    6098             :                 }
    6099           0 :         }
    6100             : 
    6101           0 :         g_opts = *opts;
    6102             : 
    6103           0 :         return 0;
    6104           0 : }
    6105             : 
    6106             : struct set_nvme_hotplug_ctx {
    6107             :         uint64_t period_us;
    6108             :         bool enabled;
    6109             :         spdk_msg_fn fn;
    6110             :         void *fn_ctx;
    6111             : };
    6112             : 
    6113             : static void
    6114           0 : set_nvme_hotplug_period_cb(void *_ctx)
    6115             : {
    6116           0 :         struct set_nvme_hotplug_ctx *ctx = _ctx;
    6117             : 
    6118           0 :         spdk_poller_unregister(&g_hotplug_poller);
    6119           0 :         if (ctx->enabled) {
    6120           0 :                 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us);
    6121           0 :         } else {
    6122           0 :                 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
    6123             :                                                         NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
    6124             :         }
    6125             : 
    6126           0 :         g_nvme_hotplug_poll_period_us = ctx->period_us;
    6127           0 :         g_nvme_hotplug_enabled = ctx->enabled;
    6128           0 :         if (ctx->fn) {
    6129           0 :                 ctx->fn(ctx->fn_ctx);
    6130           0 :         }
    6131             : 
    6132           0 :         free(ctx);
    6133           0 : }
    6134             : 
    6135             : int
    6136           0 : bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
    6137             : {
    6138             :         struct set_nvme_hotplug_ctx *ctx;
    6139             : 
    6140           0 :         if (enabled == true && !spdk_process_is_primary()) {
    6141           0 :                 return -EPERM;
    6142             :         }
    6143             : 
    6144           0 :         ctx = calloc(1, sizeof(*ctx));
    6145           0 :         if (ctx == NULL) {
    6146           0 :                 return -ENOMEM;
    6147             :         }
    6148             : 
    6149           0 :         period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
    6150           0 :         ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
    6151           0 :         ctx->enabled = enabled;
    6152           0 :         ctx->fn = cb;
    6153           0 :         ctx->fn_ctx = cb_ctx;
    6154             : 
    6155           0 :         spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
    6156           0 :         return 0;
    6157           0 : }
    6158             : 
    6159             : static void
    6160          46 : nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
    6161             :                                     struct nvme_async_probe_ctx *ctx)
    6162             : {
    6163             :         struct nvme_ns  *nvme_ns;
    6164             :         struct nvme_bdev        *nvme_bdev;
    6165             :         size_t                  j;
    6166             : 
    6167          46 :         assert(nvme_ctrlr != NULL);
    6168             : 
    6169          46 :         if (ctx->names == NULL) {
    6170           0 :                 ctx->reported_bdevs = 0;
    6171           0 :                 populate_namespaces_cb(ctx, 0);
    6172           0 :                 return;
    6173             :         }
    6174             : 
    6175             :         /*
    6176             :          * Report the new bdevs that were created in this call.
    6177             :          * There can be more than one bdev per NVMe controller.
    6178             :          */
    6179          46 :         j = 0;
    6180          46 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    6181          94 :         while (nvme_ns != NULL) {
    6182          48 :                 nvme_bdev = nvme_ns->bdev;
    6183          48 :                 if (j < ctx->max_bdevs) {
    6184          48 :                         ctx->names[j] = nvme_bdev->disk.name;
    6185          48 :                         j++;
    6186          48 :                 } else {
    6187           0 :                         NVME_CTRLR_ERRLOG(nvme_ctrlr,
    6188             :                                           "Maximum number of namespaces supported per NVMe controller is %du. "
    6189             :                                           "Unable to return all names of created bdevs\n",
    6190             :                                           ctx->max_bdevs);
    6191           0 :                         ctx->reported_bdevs = 0;
    6192           0 :                         populate_namespaces_cb(ctx, -ERANGE);
    6193           0 :                         return;
    6194             :                 }
    6195             : 
    6196          48 :                 nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    6197             :         }
    6198             : 
    6199          46 :         ctx->reported_bdevs = j;
    6200          46 :         populate_namespaces_cb(ctx, 0);
    6201          46 : }
    6202             : 
    6203             : static int
    6204           9 : bdev_nvme_check_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    6205             :                                struct spdk_nvme_ctrlr *new_ctrlr,
    6206             :                                struct spdk_nvme_transport_id *trid)
    6207             : {
    6208             :         struct nvme_path_id *tmp_trid;
    6209             : 
    6210           9 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    6211           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "PCIe failover is not supported.\n");
    6212           0 :                 return -ENOTSUP;
    6213             :         }
    6214             : 
    6215             :         /* Currently we only support failover to the same transport type. */
    6216           9 :         if (nvme_ctrlr->active_path_id->trid.trtype != trid->trtype) {
    6217           0 :                 NVME_CTRLR_WARNLOG(nvme_ctrlr,
    6218             :                                    "Failover from trtype: %s to a different trtype: %s is not supported currently\n",
    6219             :                                    spdk_nvme_transport_id_trtype_str(nvme_ctrlr->active_path_id->trid.trtype),
    6220             :                                    spdk_nvme_transport_id_trtype_str(trid->trtype));
    6221           0 :                 return -EINVAL;
    6222             :         }
    6223             : 
    6224             : 
    6225             :         /* Currently we only support failover to the same NQN. */
    6226           9 :         if (strncmp(trid->subnqn, nvme_ctrlr->active_path_id->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
    6227           0 :                 NVME_CTRLR_WARNLOG(nvme_ctrlr,
    6228             :                                    "Failover from subnqn: %s to a different subnqn: %s is not supported currently\n",
    6229             :                                    nvme_ctrlr->active_path_id->trid.subnqn, trid->subnqn);
    6230           0 :                 return -EINVAL;
    6231             :         }
    6232             : 
    6233             :         /* Skip all the other checks if we've already registered this path. */
    6234          21 :         TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) {
    6235          12 :                 if (!spdk_nvme_transport_id_compare(&tmp_trid->trid, trid)) {
    6236           0 :                         NVME_CTRLR_WARNLOG(nvme_ctrlr, "This path (traddr: %s subnqn: %s) is already registered\n",
    6237             :                                            trid->traddr, trid->subnqn);
    6238           0 :                         return -EALREADY;
    6239             :                 }
    6240          12 :         }
    6241             : 
    6242           9 :         return 0;
    6243           9 : }
    6244             : 
    6245             : static int
    6246           9 : bdev_nvme_check_secondary_namespace(struct nvme_ctrlr *nvme_ctrlr,
    6247             :                                     struct spdk_nvme_ctrlr *new_ctrlr)
    6248             : {
    6249             :         struct nvme_ns *nvme_ns;
    6250             :         struct spdk_nvme_ns *new_ns;
    6251             : 
    6252           9 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    6253           9 :         while (nvme_ns != NULL) {
    6254           0 :                 new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id);
    6255           0 :                 assert(new_ns != NULL);
    6256             : 
    6257           0 :                 if (!bdev_nvme_compare_ns(nvme_ns->ns, new_ns)) {
    6258           0 :                         return -EINVAL;
    6259             :                 }
    6260             : 
    6261           0 :                 nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    6262             :         }
    6263             : 
    6264           9 :         return 0;
    6265           9 : }
    6266             : 
    6267             : static int
    6268           9 : _bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    6269             :                               struct spdk_nvme_transport_id *trid)
    6270             : {
    6271             :         struct nvme_path_id *active_id, *new_trid, *tmp_trid;
    6272             : 
    6273           9 :         new_trid = calloc(1, sizeof(*new_trid));
    6274           9 :         if (new_trid == NULL) {
    6275           0 :                 return -ENOMEM;
    6276             :         }
    6277           9 :         new_trid->trid = *trid;
    6278             : 
    6279           9 :         active_id = nvme_ctrlr->active_path_id;
    6280           9 :         assert(active_id != NULL);
    6281           9 :         assert(active_id == TAILQ_FIRST(&nvme_ctrlr->trids));
    6282             : 
    6283             :         /* Skip the active trid not to replace it until it is failed. */
    6284           9 :         tmp_trid = TAILQ_NEXT(active_id, link);
    6285           9 :         if (tmp_trid == NULL) {
    6286           6 :                 goto add_tail;
    6287             :         }
    6288             : 
    6289             :         /* It means the trid is faled if its last failed time is non-zero.
    6290             :          * Insert the new alternate trid before any failed trid.
    6291             :          */
    6292           5 :         TAILQ_FOREACH_FROM(tmp_trid, &nvme_ctrlr->trids, link) {
    6293           3 :                 if (tmp_trid->last_failed_tsc != 0) {
    6294           1 :                         TAILQ_INSERT_BEFORE(tmp_trid, new_trid, link);
    6295           1 :                         return 0;
    6296             :                 }
    6297           4 :         }
    6298             : 
    6299             : add_tail:
    6300           8 :         TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, new_trid, link);
    6301           8 :         return 0;
    6302           9 : }
    6303             : 
    6304             : /* This is the case that a secondary path is added to an existing
    6305             :  * nvme_ctrlr for failover. After checking if it can access the same
    6306             :  * namespaces as the primary path, it is disconnected until failover occurs.
    6307             :  */
    6308             : static int
    6309           9 : bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    6310             :                              struct spdk_nvme_ctrlr *new_ctrlr,
    6311             :                              struct spdk_nvme_transport_id *trid)
    6312             : {
    6313             :         int rc;
    6314             : 
    6315           9 :         assert(nvme_ctrlr != NULL);
    6316             : 
    6317           9 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    6318             : 
    6319           9 :         rc = bdev_nvme_check_secondary_trid(nvme_ctrlr, new_ctrlr, trid);
    6320           9 :         if (rc != 0) {
    6321           0 :                 goto exit;
    6322             :         }
    6323             : 
    6324           9 :         rc = bdev_nvme_check_secondary_namespace(nvme_ctrlr, new_ctrlr);
    6325           9 :         if (rc != 0) {
    6326           0 :                 goto exit;
    6327             :         }
    6328             : 
    6329           9 :         rc = _bdev_nvme_add_secondary_trid(nvme_ctrlr, trid);
    6330             : 
    6331             : exit:
    6332           9 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6333             : 
    6334           9 :         spdk_nvme_detach(new_ctrlr);
    6335             : 
    6336           9 :         return rc;
    6337             : }
    6338             : 
    6339             : static void
    6340          47 : connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    6341             :                   struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
    6342             : {
    6343          47 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    6344             :         struct nvme_async_probe_ctx *ctx;
    6345             :         int rc;
    6346             : 
    6347          47 :         ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
    6348          47 :         ctx->ctrlr_attached = true;
    6349             : 
    6350          47 :         rc = nvme_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx);
    6351          47 :         if (rc != 0) {
    6352           1 :                 ctx->reported_bdevs = 0;
    6353           1 :                 populate_namespaces_cb(ctx, rc);
    6354           1 :         }
    6355          47 : }
    6356             : 
    6357             : 
    6358             : static void
    6359           4 : connect_set_failover_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    6360             :                         struct spdk_nvme_ctrlr *ctrlr,
    6361             :                         const struct spdk_nvme_ctrlr_opts *opts)
    6362             : {
    6363           4 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    6364             :         struct nvme_ctrlr *nvme_ctrlr;
    6365             :         struct nvme_async_probe_ctx *ctx;
    6366             :         int rc;
    6367             : 
    6368           4 :         ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
    6369           4 :         ctx->ctrlr_attached = true;
    6370             : 
    6371           4 :         nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
    6372           4 :         if (nvme_ctrlr) {
    6373           4 :                 rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, ctrlr, &ctx->trid);
    6374           4 :         } else {
    6375           0 :                 rc = -ENODEV;
    6376             :         }
    6377             : 
    6378           4 :         ctx->reported_bdevs = 0;
    6379           4 :         populate_namespaces_cb(ctx, rc);
    6380           4 : }
    6381             : 
    6382             : static int
    6383          52 : bdev_nvme_async_poll(void *arg)
    6384             : {
    6385          52 :         struct nvme_async_probe_ctx     *ctx = arg;
    6386             :         int                             rc;
    6387             : 
    6388          52 :         rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
    6389          52 :         if (spdk_unlikely(rc != -EAGAIN)) {
    6390          52 :                 ctx->probe_done = true;
    6391          52 :                 spdk_poller_unregister(&ctx->poller);
    6392          52 :                 if (!ctx->ctrlr_attached) {
    6393             :                         /* The probe is done, but no controller was attached.
    6394             :                          * That means we had a failure, so report -EIO back to
    6395             :                          * the caller (usually the RPC). populate_namespaces_cb()
    6396             :                          * will take care of freeing the nvme_async_probe_ctx.
    6397             :                          */
    6398           1 :                         ctx->reported_bdevs = 0;
    6399           1 :                         populate_namespaces_cb(ctx, -EIO);
    6400          52 :                 } else if (ctx->namespaces_populated) {
    6401             :                         /* The namespaces for the attached controller were all
    6402             :                          * populated and the response was already sent to the
    6403             :                          * caller (usually the RPC).  So free the context here.
    6404             :                          */
    6405          21 :                         free_nvme_async_probe_ctx(ctx);
    6406          21 :                 }
    6407          52 :         }
    6408             : 
    6409          52 :         return SPDK_POLLER_BUSY;
    6410             : }
    6411             : 
    6412             : static bool
    6413          71 : bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
    6414             :                 uint32_t reconnect_delay_sec,
    6415             :                 uint32_t fast_io_fail_timeout_sec)
    6416             : {
    6417          71 :         if (ctrlr_loss_timeout_sec < -1) {
    6418           1 :                 SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n");
    6419           1 :                 return false;
    6420          70 :         } else if (ctrlr_loss_timeout_sec == -1) {
    6421          14 :                 if (reconnect_delay_sec == 0) {
    6422           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
    6423           1 :                         return false;
    6424          13 :                 } else if (fast_io_fail_timeout_sec != 0 &&
    6425           3 :                            fast_io_fail_timeout_sec < reconnect_delay_sec) {
    6426           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io-fail_timeout_sec.\n");
    6427           1 :                         return false;
    6428             :                 }
    6429          68 :         } else if (ctrlr_loss_timeout_sec != 0) {
    6430          11 :                 if (reconnect_delay_sec == 0) {
    6431           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
    6432           1 :                         return false;
    6433          10 :                 } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) {
    6434           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n");
    6435           1 :                         return false;
    6436           9 :                 } else if (fast_io_fail_timeout_sec != 0) {
    6437           6 :                         if (fast_io_fail_timeout_sec < reconnect_delay_sec) {
    6438           1 :                                 SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io_fail_timeout_sec.\n");
    6439           1 :                                 return false;
    6440           5 :                         } else if (fast_io_fail_timeout_sec > (uint32_t)ctrlr_loss_timeout_sec) {
    6441           1 :                                 SPDK_ERRLOG("fast_io_fail_timeout_sec can't be more than ctrlr_loss_timeout_sec.\n");
    6442           1 :                                 return false;
    6443             :                         }
    6444           4 :                 }
    6445          52 :         } else if (reconnect_delay_sec != 0 || fast_io_fail_timeout_sec != 0) {
    6446           2 :                 SPDK_ERRLOG("Both reconnect_delay_sec and fast_io_fail_timeout_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n");
    6447           2 :                 return false;
    6448             :         }
    6449             : 
    6450          62 :         return true;
    6451          71 : }
    6452             : 
    6453             : int
    6454          52 : spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid,
    6455             :                       const char *base_name,
    6456             :                       const char **names,
    6457             :                       uint32_t count,
    6458             :                       spdk_bdev_nvme_create_cb cb_fn,
    6459             :                       void *cb_ctx,
    6460             :                       struct spdk_nvme_ctrlr_opts *drv_opts,
    6461             :                       struct spdk_bdev_nvme_ctrlr_opts *bdev_opts)
    6462             : {
    6463             :         struct nvme_probe_skip_entry *entry, *tmp;
    6464             :         struct nvme_async_probe_ctx *ctx;
    6465             :         spdk_nvme_attach_cb attach_cb;
    6466             :         struct nvme_ctrlr *nvme_ctrlr;
    6467             :         int len;
    6468             : 
    6469             :         /* TODO expand this check to include both the host and target TRIDs.
    6470             :          * Only if both are the same should we fail.
    6471             :          */
    6472          52 :         if (nvme_ctrlr_get(trid, drv_opts->hostnqn) != NULL) {
    6473           0 :                 SPDK_ERRLOG("A controller with the provided trid (traddr: %s, hostnqn: %s) "
    6474             :                             "already exists.\n", trid->traddr, drv_opts->hostnqn);
    6475           0 :                 return -EEXIST;
    6476             :         }
    6477             : 
    6478          52 :         len = strnlen(base_name, SPDK_CONTROLLER_NAME_MAX);
    6479             : 
    6480          52 :         if (len == 0 || len == SPDK_CONTROLLER_NAME_MAX) {
    6481           0 :                 SPDK_ERRLOG("controller name must be between 1 and %d characters\n", SPDK_CONTROLLER_NAME_MAX - 1);
    6482           0 :                 return -EINVAL;
    6483             :         }
    6484             : 
    6485          52 :         if (bdev_opts != NULL &&
    6486         104 :             !bdev_nvme_check_io_error_resiliency_params(bdev_opts->ctrlr_loss_timeout_sec,
    6487          52 :                             bdev_opts->reconnect_delay_sec,
    6488          52 :                             bdev_opts->fast_io_fail_timeout_sec)) {
    6489           0 :                 return -EINVAL;
    6490             :         }
    6491             : 
    6492          52 :         ctx = calloc(1, sizeof(*ctx));
    6493          52 :         if (!ctx) {
    6494           0 :                 return -ENOMEM;
    6495             :         }
    6496          52 :         ctx->base_name = base_name;
    6497          52 :         ctx->names = names;
    6498          52 :         ctx->max_bdevs = count;
    6499          52 :         ctx->cb_fn = cb_fn;
    6500          52 :         ctx->cb_ctx = cb_ctx;
    6501          52 :         ctx->trid = *trid;
    6502             : 
    6503          52 :         if (bdev_opts) {
    6504          52 :                 memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
    6505          52 :         } else {
    6506           0 :                 spdk_bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
    6507             :         }
    6508             : 
    6509          52 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    6510           0 :                 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
    6511           0 :                         if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
    6512           0 :                                 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
    6513           0 :                                 free(entry);
    6514           0 :                                 break;
    6515             :                         }
    6516           0 :                 }
    6517           0 :         }
    6518             : 
    6519          52 :         memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
    6520          52 :         ctx->drv_opts.transport_retry_count = g_opts.transport_retry_count;
    6521          52 :         ctx->drv_opts.transport_ack_timeout = g_opts.transport_ack_timeout;
    6522          52 :         ctx->drv_opts.keep_alive_timeout_ms = g_opts.keep_alive_timeout_ms;
    6523          52 :         ctx->drv_opts.disable_read_ana_log_page = true;
    6524          52 :         ctx->drv_opts.transport_tos = g_opts.transport_tos;
    6525             : 
    6526          52 :         if (ctx->bdev_opts.psk != NULL) {
    6527           0 :                 ctx->drv_opts.tls_psk = spdk_keyring_get_key(ctx->bdev_opts.psk);
    6528           0 :                 if (ctx->drv_opts.tls_psk == NULL) {
    6529           0 :                         SPDK_ERRLOG("Could not load PSK: %s\n", ctx->bdev_opts.psk);
    6530           0 :                         free_nvme_async_probe_ctx(ctx);
    6531           0 :                         return -ENOKEY;
    6532             :                 }
    6533           0 :         }
    6534             : 
    6535          52 :         if (ctx->bdev_opts.dhchap_key != NULL) {
    6536           0 :                 ctx->drv_opts.dhchap_key = spdk_keyring_get_key(ctx->bdev_opts.dhchap_key);
    6537           0 :                 if (ctx->drv_opts.dhchap_key == NULL) {
    6538           0 :                         SPDK_ERRLOG("Could not load DH-HMAC-CHAP key: %s\n",
    6539             :                                     ctx->bdev_opts.dhchap_key);
    6540           0 :                         free_nvme_async_probe_ctx(ctx);
    6541           0 :                         return -ENOKEY;
    6542             :                 }
    6543             : 
    6544           0 :                 ctx->drv_opts.dhchap_digests = g_opts.dhchap_digests;
    6545           0 :                 ctx->drv_opts.dhchap_dhgroups = g_opts.dhchap_dhgroups;
    6546           0 :         }
    6547          52 :         if (ctx->bdev_opts.dhchap_ctrlr_key != NULL) {
    6548           0 :                 ctx->drv_opts.dhchap_ctrlr_key =
    6549           0 :                         spdk_keyring_get_key(ctx->bdev_opts.dhchap_ctrlr_key);
    6550           0 :                 if (ctx->drv_opts.dhchap_ctrlr_key == NULL) {
    6551           0 :                         SPDK_ERRLOG("Could not load DH-HMAC-CHAP controller key: %s\n",
    6552             :                                     ctx->bdev_opts.dhchap_ctrlr_key);
    6553           0 :                         free_nvme_async_probe_ctx(ctx);
    6554           0 :                         return -ENOKEY;
    6555             :                 }
    6556           0 :         }
    6557             : 
    6558          52 :         if (nvme_bdev_ctrlr_get_by_name(base_name) == NULL || ctx->bdev_opts.multipath) {
    6559          48 :                 attach_cb = connect_attach_cb;
    6560          48 :         } else {
    6561           4 :                 attach_cb = connect_set_failover_cb;
    6562             :         }
    6563             : 
    6564          52 :         nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
    6565          52 :         if (nvme_ctrlr  && nvme_ctrlr->opts.multipath != ctx->bdev_opts.multipath) {
    6566             :                 /* All controllers with the same name must be configured the same
    6567             :                  * way, either for multipath or failover. If the configuration doesn't
    6568             :                  * match - report error.
    6569             :                  */
    6570           0 :                 free_nvme_async_probe_ctx(ctx);
    6571           0 :                 return -EINVAL;
    6572             :         }
    6573             : 
    6574          52 :         ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, attach_cb);
    6575          52 :         if (ctx->probe_ctx == NULL) {
    6576           0 :                 SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
    6577           0 :                 free_nvme_async_probe_ctx(ctx);
    6578           0 :                 return -ENODEV;
    6579             :         }
    6580          52 :         ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000);
    6581             : 
    6582          52 :         return 0;
    6583          52 : }
    6584             : 
    6585             : struct bdev_nvme_delete_ctx {
    6586             :         char                        *name;
    6587             :         struct nvme_path_id         path_id;
    6588             :         bdev_nvme_delete_done_fn    delete_done;
    6589             :         void                        *delete_done_ctx;
    6590             :         uint64_t                    timeout_ticks;
    6591             :         struct spdk_poller          *poller;
    6592             : };
    6593             : 
    6594             : static void
    6595           2 : free_bdev_nvme_delete_ctx(struct bdev_nvme_delete_ctx *ctx)
    6596             : {
    6597           2 :         if (ctx != NULL) {
    6598           1 :                 free(ctx->name);
    6599           1 :                 free(ctx);
    6600           1 :         }
    6601           2 : }
    6602             : 
    6603             : static bool
    6604          75 : nvme_path_id_compare(struct nvme_path_id *p, const struct nvme_path_id *path_id)
    6605             : {
    6606          75 :         if (path_id->trid.trtype != 0) {
    6607          21 :                 if (path_id->trid.trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
    6608           0 :                         if (strcasecmp(path_id->trid.trstring, p->trid.trstring) != 0) {
    6609           0 :                                 return false;
    6610             :                         }
    6611           0 :                 } else {
    6612          21 :                         if (path_id->trid.trtype != p->trid.trtype) {
    6613           0 :                                 return false;
    6614             :                         }
    6615             :                 }
    6616          21 :         }
    6617             : 
    6618          75 :         if (!spdk_mem_all_zero(path_id->trid.traddr, sizeof(path_id->trid.traddr))) {
    6619          21 :                 if (strcasecmp(path_id->trid.traddr, p->trid.traddr) != 0) {
    6620          11 :                         return false;
    6621             :                 }
    6622          10 :         }
    6623             : 
    6624          64 :         if (path_id->trid.adrfam != 0) {
    6625           0 :                 if (path_id->trid.adrfam != p->trid.adrfam) {
    6626           0 :                         return false;
    6627             :                 }
    6628           0 :         }
    6629             : 
    6630          64 :         if (!spdk_mem_all_zero(path_id->trid.trsvcid, sizeof(path_id->trid.trsvcid))) {
    6631          10 :                 if (strcasecmp(path_id->trid.trsvcid, p->trid.trsvcid) != 0) {
    6632           0 :                         return false;
    6633             :                 }
    6634          10 :         }
    6635             : 
    6636          64 :         if (!spdk_mem_all_zero(path_id->trid.subnqn, sizeof(path_id->trid.subnqn))) {
    6637          10 :                 if (strcmp(path_id->trid.subnqn, p->trid.subnqn) != 0) {
    6638           0 :                         return false;
    6639             :                 }
    6640          10 :         }
    6641             : 
    6642          64 :         if (!spdk_mem_all_zero(path_id->hostid.hostaddr, sizeof(path_id->hostid.hostaddr))) {
    6643           0 :                 if (strcmp(path_id->hostid.hostaddr, p->hostid.hostaddr) != 0) {
    6644           0 :                         return false;
    6645             :                 }
    6646           0 :         }
    6647             : 
    6648          64 :         if (!spdk_mem_all_zero(path_id->hostid.hostsvcid, sizeof(path_id->hostid.hostsvcid))) {
    6649           0 :                 if (strcmp(path_id->hostid.hostsvcid, p->hostid.hostsvcid) != 0) {
    6650           0 :                         return false;
    6651             :                 }
    6652           0 :         }
    6653             : 
    6654          64 :         return true;
    6655          75 : }
    6656             : 
    6657             : static bool
    6658           2 : nvme_path_id_exists(const char *name, const struct nvme_path_id *path_id)
    6659             : {
    6660             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
    6661             :         struct nvme_ctrlr       *ctrlr;
    6662             :         struct nvme_path_id     *p;
    6663             : 
    6664           2 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    6665           2 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    6666           2 :         if (!nbdev_ctrlr) {
    6667           1 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6668           1 :                 return false;
    6669             :         }
    6670             : 
    6671           1 :         TAILQ_FOREACH(ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    6672           1 :                 pthread_mutex_lock(&ctrlr->mutex);
    6673           1 :                 TAILQ_FOREACH(p, &ctrlr->trids, link) {
    6674           1 :                         if (nvme_path_id_compare(p, path_id)) {
    6675           1 :                                 pthread_mutex_unlock(&ctrlr->mutex);
    6676           1 :                                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6677           1 :                                 return true;
    6678             :                         }
    6679           0 :                 }
    6680           0 :                 pthread_mutex_unlock(&ctrlr->mutex);
    6681           0 :         }
    6682           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6683             : 
    6684           0 :         return false;
    6685           2 : }
    6686             : 
    6687             : static int
    6688           2 : bdev_nvme_delete_complete_poll(void *arg)
    6689             : {
    6690           2 :         struct bdev_nvme_delete_ctx     *ctx = arg;
    6691           2 :         int                             rc = 0;
    6692             : 
    6693           2 :         if (nvme_path_id_exists(ctx->name, &ctx->path_id)) {
    6694           1 :                 if (ctx->timeout_ticks > spdk_get_ticks()) {
    6695           1 :                         return SPDK_POLLER_BUSY;
    6696             :                 }
    6697             : 
    6698           0 :                 SPDK_ERRLOG("NVMe path '%s' still exists after delete\n", ctx->name);
    6699           0 :                 rc = -ETIMEDOUT;
    6700           0 :         }
    6701             : 
    6702           1 :         spdk_poller_unregister(&ctx->poller);
    6703             : 
    6704           1 :         ctx->delete_done(ctx->delete_done_ctx, rc);
    6705           1 :         free_bdev_nvme_delete_ctx(ctx);
    6706             : 
    6707           1 :         return SPDK_POLLER_BUSY;
    6708           2 : }
    6709             : 
    6710             : static int
    6711          64 : _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, const struct nvme_path_id *path_id)
    6712             : {
    6713             :         struct nvme_path_id     *p, *t;
    6714             :         spdk_msg_fn             msg_fn;
    6715          64 :         int                     rc = -ENXIO;
    6716             : 
    6717          64 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    6718             : 
    6719          74 :         TAILQ_FOREACH_REVERSE_SAFE(p, &nvme_ctrlr->trids, nvme_paths, link, t) {
    6720          74 :                 if (p == TAILQ_FIRST(&nvme_ctrlr->trids)) {
    6721          64 :                         break;
    6722             :                 }
    6723             : 
    6724          10 :                 if (!nvme_path_id_compare(p, path_id)) {
    6725           3 :                         continue;
    6726             :                 }
    6727             : 
    6728             :                 /* We are not using the specified path. */
    6729           7 :                 TAILQ_REMOVE(&nvme_ctrlr->trids, p, link);
    6730           7 :                 free(p);
    6731           7 :                 rc = 0;
    6732           7 :         }
    6733             : 
    6734          64 :         if (p == NULL || !nvme_path_id_compare(p, path_id)) {
    6735           8 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6736           8 :                 return rc;
    6737             :         }
    6738             : 
    6739             :         /* If we made it here, then this path is a match! Now we need to remove it. */
    6740             : 
    6741             :         /* This is the active path in use right now. The active path is always the first in the list. */
    6742          56 :         assert(p == nvme_ctrlr->active_path_id);
    6743             : 
    6744          56 :         if (!TAILQ_NEXT(p, link)) {
    6745             :                 /* The current path is the only path. */
    6746          55 :                 msg_fn = _nvme_ctrlr_destruct;
    6747          55 :                 rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, false);
    6748          55 :         } else {
    6749             :                 /* There is an alternative path. */
    6750           1 :                 msg_fn = _bdev_nvme_reset_ctrlr;
    6751           1 :                 rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, true);
    6752             :         }
    6753             : 
    6754          56 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6755             : 
    6756          56 :         if (rc == 0) {
    6757          56 :                 spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    6758          56 :         } else if (rc == -EALREADY) {
    6759           0 :                 rc = 0;
    6760           0 :         }
    6761             : 
    6762          56 :         return rc;
    6763          64 : }
    6764             : 
    6765             : int
    6766          49 : bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id,
    6767             :                  bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx)
    6768             : {
    6769             :         struct nvme_bdev_ctrlr          *nbdev_ctrlr;
    6770             :         struct nvme_ctrlr               *nvme_ctrlr, *tmp_nvme_ctrlr;
    6771          49 :         struct bdev_nvme_delete_ctx     *ctx = NULL;
    6772          49 :         int                             rc = -ENXIO, _rc;
    6773             : 
    6774          49 :         if (name == NULL || path_id == NULL) {
    6775           0 :                 rc = -EINVAL;
    6776           0 :                 goto exit;
    6777             :         }
    6778             : 
    6779          49 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    6780             : 
    6781          49 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    6782          49 :         if (nbdev_ctrlr == NULL) {
    6783           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6784             : 
    6785           0 :                 SPDK_ERRLOG("Failed to find NVMe bdev controller\n");
    6786           0 :                 rc = -ENODEV;
    6787           0 :                 goto exit;
    6788             :         }
    6789             : 
    6790         113 :         TAILQ_FOREACH_SAFE(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq, tmp_nvme_ctrlr) {
    6791          64 :                 _rc = _bdev_nvme_delete(nvme_ctrlr, path_id);
    6792          64 :                 if (_rc < 0 && _rc != -ENXIO) {
    6793           0 :                         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6794           0 :                         rc = _rc;
    6795           0 :                         goto exit;
    6796          64 :                 } else if (_rc == 0) {
    6797             :                         /* We traverse all remaining nvme_ctrlrs even if one nvme_ctrlr
    6798             :                          * was deleted successfully. To remember the successful deletion,
    6799             :                          * overwrite rc only if _rc is zero.
    6800             :                          */
    6801          58 :                         rc = 0;
    6802          58 :                 }
    6803          64 :         }
    6804             : 
    6805          49 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6806             : 
    6807          49 :         if (rc != 0 || delete_done == NULL) {
    6808          48 :                 goto exit;
    6809             :         }
    6810             : 
    6811           1 :         ctx = calloc(1, sizeof(*ctx));
    6812           1 :         if (ctx == NULL) {
    6813           0 :                 SPDK_ERRLOG("Failed to allocate context for bdev_nvme_delete\n");
    6814           0 :                 rc = -ENOMEM;
    6815           0 :                 goto exit;
    6816             :         }
    6817             : 
    6818           1 :         ctx->name = strdup(name);
    6819           1 :         if (ctx->name == NULL) {
    6820           0 :                 SPDK_ERRLOG("Failed to copy controller name for deletion\n");
    6821           0 :                 rc = -ENOMEM;
    6822           0 :                 goto exit;
    6823             :         }
    6824             : 
    6825           1 :         ctx->delete_done = delete_done;
    6826           1 :         ctx->delete_done_ctx = delete_done_ctx;
    6827           1 :         ctx->path_id = *path_id;
    6828           1 :         ctx->timeout_ticks = spdk_get_ticks() + 10 * spdk_get_ticks_hz();
    6829           1 :         ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_delete_complete_poll, ctx, 1000);
    6830           1 :         if (ctx->poller == NULL) {
    6831           0 :                 SPDK_ERRLOG("Failed to register bdev_nvme_delete poller\n");
    6832           0 :                 rc = -ENOMEM;
    6833           0 :                 goto exit;
    6834             :         }
    6835             : 
    6836             : exit:
    6837          49 :         if (rc != 0) {
    6838           1 :                 free_bdev_nvme_delete_ctx(ctx);
    6839           1 :         }
    6840             : 
    6841          49 :         return rc;
    6842             : }
    6843             : 
    6844             : #define DISCOVERY_INFOLOG(ctx, format, ...) \
    6845             :         SPDK_INFOLOG(bdev_nvme, "Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
    6846             : 
    6847             : #define DISCOVERY_ERRLOG(ctx, format, ...) \
    6848             :         SPDK_ERRLOG("Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
    6849             : 
    6850             : struct discovery_entry_ctx {
    6851             :         char                                            name[128];
    6852             :         struct spdk_nvme_transport_id                   trid;
    6853             :         struct spdk_nvme_ctrlr_opts                     drv_opts;
    6854             :         struct spdk_nvmf_discovery_log_page_entry       entry;
    6855             :         TAILQ_ENTRY(discovery_entry_ctx)                tailq;
    6856             :         struct discovery_ctx                            *ctx;
    6857             : };
    6858             : 
    6859             : struct discovery_ctx {
    6860             :         char                                    *name;
    6861             :         spdk_bdev_nvme_start_discovery_fn       start_cb_fn;
    6862             :         spdk_bdev_nvme_stop_discovery_fn        stop_cb_fn;
    6863             :         void                                    *cb_ctx;
    6864             :         struct spdk_nvme_probe_ctx              *probe_ctx;
    6865             :         struct spdk_nvme_detach_ctx             *detach_ctx;
    6866             :         struct spdk_nvme_ctrlr                  *ctrlr;
    6867             :         struct spdk_nvme_transport_id           trid;
    6868             :         struct discovery_entry_ctx              *entry_ctx_in_use;
    6869             :         struct spdk_poller                      *poller;
    6870             :         struct spdk_nvme_ctrlr_opts             drv_opts;
    6871             :         struct spdk_bdev_nvme_ctrlr_opts        bdev_opts;
    6872             :         struct spdk_nvmf_discovery_log_page     *log_page;
    6873             :         TAILQ_ENTRY(discovery_ctx)              tailq;
    6874             :         TAILQ_HEAD(, discovery_entry_ctx)       nvm_entry_ctxs;
    6875             :         TAILQ_HEAD(, discovery_entry_ctx)       discovery_entry_ctxs;
    6876             :         int                                     rc;
    6877             :         bool                                    wait_for_attach;
    6878             :         uint64_t                                timeout_ticks;
    6879             :         /* Denotes that the discovery service is being started. We're waiting
    6880             :          * for the initial connection to the discovery controller to be
    6881             :          * established and attach discovered NVM ctrlrs.
    6882             :          */
    6883             :         bool                                    initializing;
    6884             :         /* Denotes if a discovery is currently in progress for this context.
    6885             :          * That includes connecting to newly discovered subsystems.  Used to
    6886             :          * ensure we do not start a new discovery until an existing one is
    6887             :          * complete.
    6888             :          */
    6889             :         bool                                    in_progress;
    6890             : 
    6891             :         /* Denotes if another discovery is needed after the one in progress
    6892             :          * completes.  Set when we receive an AER completion while a discovery
    6893             :          * is already in progress.
    6894             :          */
    6895             :         bool                                    pending;
    6896             : 
    6897             :         /* Signal to the discovery context poller that it should stop the
    6898             :          * discovery service, including detaching from the current discovery
    6899             :          * controller.
    6900             :          */
    6901             :         bool                                    stop;
    6902             : 
    6903             :         struct spdk_thread                      *calling_thread;
    6904             :         uint32_t                                index;
    6905             :         uint32_t                                attach_in_progress;
    6906             :         char                                    *hostnqn;
    6907             : 
    6908             :         /* Denotes if the discovery service was started by the mdns discovery.
    6909             :          */
    6910             :         bool                                    from_mdns_discovery_service;
    6911             : };
    6912             : 
    6913             : TAILQ_HEAD(discovery_ctxs, discovery_ctx);
    6914             : static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs);
    6915             : 
    6916             : static void get_discovery_log_page(struct discovery_ctx *ctx);
    6917             : 
    6918             : static void
    6919           0 : free_discovery_ctx(struct discovery_ctx *ctx)
    6920             : {
    6921           0 :         free(ctx->log_page);
    6922           0 :         free(ctx->hostnqn);
    6923           0 :         free(ctx->name);
    6924           0 :         free(ctx);
    6925           0 : }
    6926             : 
    6927             : static void
    6928           0 : discovery_complete(struct discovery_ctx *ctx)
    6929             : {
    6930           0 :         ctx->initializing = false;
    6931           0 :         ctx->in_progress = false;
    6932           0 :         if (ctx->pending) {
    6933           0 :                 ctx->pending = false;
    6934           0 :                 get_discovery_log_page(ctx);
    6935           0 :         }
    6936           0 : }
    6937             : 
    6938             : static void
    6939           0 : build_trid_from_log_page_entry(struct spdk_nvme_transport_id *trid,
    6940             :                                struct spdk_nvmf_discovery_log_page_entry *entry)
    6941             : {
    6942             :         char *space;
    6943             : 
    6944           0 :         trid->trtype = entry->trtype;
    6945           0 :         trid->adrfam = entry->adrfam;
    6946           0 :         memcpy(trid->traddr, entry->traddr, sizeof(entry->traddr));
    6947           0 :         memcpy(trid->trsvcid, entry->trsvcid, sizeof(entry->trsvcid));
    6948             :         /* Because the source buffer (entry->subnqn) is longer than trid->subnqn, and
    6949             :          * before call to this function trid->subnqn is zeroed out, we need
    6950             :          * to copy sizeof(trid->subnqn) minus one byte to make sure the last character
    6951             :          * remains 0. Then we can shorten the string (replace ' ' with 0) if required
    6952             :          */
    6953           0 :         memcpy(trid->subnqn, entry->subnqn, sizeof(trid->subnqn) - 1);
    6954             : 
    6955             :         /* We want the traddr, trsvcid and subnqn fields to be NULL-terminated.
    6956             :          * But the log page entries typically pad them with spaces, not zeroes.
    6957             :          * So add a NULL terminator to each of these fields at the appropriate
    6958             :          * location.
    6959             :          */
    6960           0 :         space = strchr(trid->traddr, ' ');
    6961           0 :         if (space) {
    6962           0 :                 *space = 0;
    6963           0 :         }
    6964           0 :         space = strchr(trid->trsvcid, ' ');
    6965           0 :         if (space) {
    6966           0 :                 *space = 0;
    6967           0 :         }
    6968           0 :         space = strchr(trid->subnqn, ' ');
    6969           0 :         if (space) {
    6970           0 :                 *space = 0;
    6971           0 :         }
    6972           0 : }
    6973             : 
    6974             : static void
    6975           0 : _stop_discovery(void *_ctx)
    6976             : {
    6977           0 :         struct discovery_ctx *ctx = _ctx;
    6978             : 
    6979           0 :         if (ctx->attach_in_progress > 0) {
    6980           0 :                 spdk_thread_send_msg(spdk_get_thread(), _stop_discovery, ctx);
    6981           0 :                 return;
    6982             :         }
    6983             : 
    6984           0 :         ctx->stop = true;
    6985             : 
    6986           0 :         while (!TAILQ_EMPTY(&ctx->nvm_entry_ctxs)) {
    6987             :                 struct discovery_entry_ctx *entry_ctx;
    6988           0 :                 struct nvme_path_id path = {};
    6989             : 
    6990           0 :                 entry_ctx = TAILQ_FIRST(&ctx->nvm_entry_ctxs);
    6991           0 :                 path.trid = entry_ctx->trid;
    6992           0 :                 bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
    6993           0 :                 TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
    6994           0 :                 free(entry_ctx);
    6995             :         }
    6996             : 
    6997           0 :         while (!TAILQ_EMPTY(&ctx->discovery_entry_ctxs)) {
    6998             :                 struct discovery_entry_ctx *entry_ctx;
    6999             : 
    7000           0 :                 entry_ctx = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
    7001           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
    7002           0 :                 free(entry_ctx);
    7003             :         }
    7004             : 
    7005           0 :         free(ctx->entry_ctx_in_use);
    7006           0 :         ctx->entry_ctx_in_use = NULL;
    7007           0 : }
    7008             : 
    7009             : static void
    7010           0 : stop_discovery(struct discovery_ctx *ctx, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
    7011             : {
    7012           0 :         ctx->stop_cb_fn = cb_fn;
    7013           0 :         ctx->cb_ctx = cb_ctx;
    7014             : 
    7015           0 :         if (ctx->attach_in_progress > 0) {
    7016           0 :                 DISCOVERY_INFOLOG(ctx, "stopping discovery with attach_in_progress: %"PRIu32"\n",
    7017             :                                   ctx->attach_in_progress);
    7018           0 :         }
    7019             : 
    7020           0 :         _stop_discovery(ctx);
    7021           0 : }
    7022             : 
    7023             : static void
    7024           2 : remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr)
    7025             : {
    7026             :         struct discovery_ctx *d_ctx;
    7027             :         struct nvme_path_id *path_id;
    7028           2 :         struct spdk_nvme_transport_id trid = {};
    7029             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    7030             : 
    7031           2 :         path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
    7032             : 
    7033           2 :         TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
    7034           0 :                 TAILQ_FOREACH_SAFE(entry_ctx, &d_ctx->nvm_entry_ctxs, tailq, tmp) {
    7035           0 :                         build_trid_from_log_page_entry(&trid, &entry_ctx->entry);
    7036           0 :                         if (spdk_nvme_transport_id_compare(&trid, &path_id->trid) != 0) {
    7037           0 :                                 continue;
    7038             :                         }
    7039             : 
    7040           0 :                         TAILQ_REMOVE(&d_ctx->nvm_entry_ctxs, entry_ctx, tailq);
    7041           0 :                         free(entry_ctx);
    7042           0 :                         DISCOVERY_INFOLOG(d_ctx, "Remove discovery entry: %s:%s:%s\n",
    7043             :                                           trid.subnqn, trid.traddr, trid.trsvcid);
    7044             : 
    7045             :                         /* Fail discovery ctrlr to force reattach attempt */
    7046           0 :                         spdk_nvme_ctrlr_fail(d_ctx->ctrlr);
    7047           0 :                 }
    7048           0 :         }
    7049           2 : }
    7050             : 
    7051             : static void
    7052           0 : discovery_remove_controllers(struct discovery_ctx *ctx)
    7053             : {
    7054           0 :         struct spdk_nvmf_discovery_log_page *log_page = ctx->log_page;
    7055             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    7056             :         struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
    7057           0 :         struct spdk_nvme_transport_id old_trid = {};
    7058             :         uint64_t numrec, i;
    7059             :         bool found;
    7060             : 
    7061           0 :         numrec = from_le64(&log_page->numrec);
    7062           0 :         TAILQ_FOREACH_SAFE(entry_ctx, &ctx->nvm_entry_ctxs, tailq, tmp) {
    7063           0 :                 found = false;
    7064           0 :                 old_entry = &entry_ctx->entry;
    7065           0 :                 build_trid_from_log_page_entry(&old_trid, old_entry);
    7066           0 :                 for (i = 0; i < numrec; i++) {
    7067           0 :                         new_entry = &log_page->entries[i];
    7068           0 :                         if (!memcmp(old_entry, new_entry, sizeof(*old_entry))) {
    7069           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s found again\n",
    7070             :                                                   old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
    7071           0 :                                 found = true;
    7072           0 :                                 break;
    7073             :                         }
    7074           0 :                 }
    7075           0 :                 if (!found) {
    7076           0 :                         struct nvme_path_id path = {};
    7077             : 
    7078           0 :                         DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s not found\n",
    7079             :                                           old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
    7080             : 
    7081           0 :                         path.trid = entry_ctx->trid;
    7082           0 :                         bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
    7083           0 :                         TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
    7084           0 :                         free(entry_ctx);
    7085           0 :                 }
    7086           0 :         }
    7087           0 :         free(log_page);
    7088           0 :         ctx->log_page = NULL;
    7089           0 :         discovery_complete(ctx);
    7090           0 : }
    7091             : 
    7092             : static void
    7093           0 : complete_discovery_start(struct discovery_ctx *ctx, int status)
    7094             : {
    7095           0 :         ctx->timeout_ticks = 0;
    7096           0 :         ctx->rc = status;
    7097           0 :         if (ctx->start_cb_fn) {
    7098           0 :                 ctx->start_cb_fn(ctx->cb_ctx, status);
    7099           0 :                 ctx->start_cb_fn = NULL;
    7100           0 :                 ctx->cb_ctx = NULL;
    7101           0 :         }
    7102           0 : }
    7103             : 
    7104             : static void
    7105           0 : discovery_attach_controller_done(void *cb_ctx, size_t bdev_count, int rc)
    7106             : {
    7107           0 :         struct discovery_entry_ctx *entry_ctx = cb_ctx;
    7108           0 :         struct discovery_ctx *ctx = entry_ctx->ctx;
    7109             : 
    7110           0 :         DISCOVERY_INFOLOG(ctx, "attach %s done\n", entry_ctx->name);
    7111           0 :         ctx->attach_in_progress--;
    7112           0 :         if (ctx->attach_in_progress == 0) {
    7113           0 :                 complete_discovery_start(ctx, ctx->rc);
    7114           0 :                 if (ctx->initializing && ctx->rc != 0) {
    7115           0 :                         DISCOVERY_ERRLOG(ctx, "stopping discovery due to errors: %d\n", ctx->rc);
    7116           0 :                         stop_discovery(ctx, NULL, ctx->cb_ctx);
    7117           0 :                 } else {
    7118           0 :                         discovery_remove_controllers(ctx);
    7119             :                 }
    7120           0 :         }
    7121           0 : }
    7122             : 
    7123             : static struct discovery_entry_ctx *
    7124           0 : create_discovery_entry_ctx(struct discovery_ctx *ctx, struct spdk_nvme_transport_id *trid)
    7125             : {
    7126             :         struct discovery_entry_ctx *new_ctx;
    7127             : 
    7128           0 :         new_ctx = calloc(1, sizeof(*new_ctx));
    7129           0 :         if (new_ctx == NULL) {
    7130           0 :                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7131           0 :                 return NULL;
    7132             :         }
    7133             : 
    7134           0 :         new_ctx->ctx = ctx;
    7135           0 :         memcpy(&new_ctx->trid, trid, sizeof(*trid));
    7136           0 :         spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
    7137           0 :         snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
    7138           0 :         return new_ctx;
    7139           0 : }
    7140             : 
    7141             : static void
    7142           0 : discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
    7143             :                       struct spdk_nvmf_discovery_log_page *log_page)
    7144             : {
    7145           0 :         struct discovery_ctx *ctx = cb_arg;
    7146             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    7147             :         struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
    7148             :         uint64_t numrec, i;
    7149             :         bool found;
    7150             : 
    7151           0 :         if (rc || spdk_nvme_cpl_is_error(cpl)) {
    7152           0 :                 DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
    7153           0 :                 return;
    7154             :         }
    7155             : 
    7156           0 :         ctx->log_page = log_page;
    7157           0 :         assert(ctx->attach_in_progress == 0);
    7158           0 :         numrec = from_le64(&log_page->numrec);
    7159           0 :         TAILQ_FOREACH_SAFE(entry_ctx, &ctx->discovery_entry_ctxs, tailq, tmp) {
    7160           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
    7161           0 :                 free(entry_ctx);
    7162           0 :         }
    7163           0 :         for (i = 0; i < numrec; i++) {
    7164           0 :                 found = false;
    7165           0 :                 new_entry = &log_page->entries[i];
    7166           0 :                 if (new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT ||
    7167           0 :                     new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
    7168             :                         struct discovery_entry_ctx *new_ctx;
    7169           0 :                         struct spdk_nvme_transport_id trid = {};
    7170             : 
    7171           0 :                         build_trid_from_log_page_entry(&trid, new_entry);
    7172           0 :                         new_ctx = create_discovery_entry_ctx(ctx, &trid);
    7173           0 :                         if (new_ctx == NULL) {
    7174           0 :                                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7175           0 :                                 break;
    7176             :                         }
    7177             : 
    7178           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, new_ctx, tailq);
    7179           0 :                         continue;
    7180             :                 }
    7181           0 :                 TAILQ_FOREACH(entry_ctx, &ctx->nvm_entry_ctxs, tailq) {
    7182           0 :                         old_entry = &entry_ctx->entry;
    7183           0 :                         if (!memcmp(new_entry, old_entry, sizeof(*new_entry))) {
    7184           0 :                                 found = true;
    7185           0 :                                 break;
    7186             :                         }
    7187           0 :                 }
    7188           0 :                 if (!found) {
    7189           0 :                         struct discovery_entry_ctx *subnqn_ctx = NULL, *new_ctx;
    7190             :                         struct discovery_ctx *d_ctx;
    7191             : 
    7192           0 :                         TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
    7193           0 :                                 TAILQ_FOREACH(subnqn_ctx, &d_ctx->nvm_entry_ctxs, tailq) {
    7194           0 :                                         if (!memcmp(subnqn_ctx->entry.subnqn, new_entry->subnqn,
    7195             :                                                     sizeof(new_entry->subnqn))) {
    7196           0 :                                                 break;
    7197             :                                         }
    7198           0 :                                 }
    7199           0 :                                 if (subnqn_ctx) {
    7200           0 :                                         break;
    7201             :                                 }
    7202           0 :                         }
    7203             : 
    7204           0 :                         new_ctx = calloc(1, sizeof(*new_ctx));
    7205           0 :                         if (new_ctx == NULL) {
    7206           0 :                                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7207           0 :                                 break;
    7208             :                         }
    7209             : 
    7210           0 :                         new_ctx->ctx = ctx;
    7211           0 :                         memcpy(&new_ctx->entry, new_entry, sizeof(*new_entry));
    7212           0 :                         build_trid_from_log_page_entry(&new_ctx->trid, new_entry);
    7213           0 :                         if (subnqn_ctx) {
    7214           0 :                                 snprintf(new_ctx->name, sizeof(new_ctx->name), "%s", subnqn_ctx->name);
    7215           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new path for %s\n",
    7216             :                                                   new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
    7217             :                                                   new_ctx->name);
    7218           0 :                         } else {
    7219           0 :                                 snprintf(new_ctx->name, sizeof(new_ctx->name), "%s%d", ctx->name, ctx->index++);
    7220           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new subsystem %s\n",
    7221             :                                                   new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
    7222             :                                                   new_ctx->name);
    7223             :                         }
    7224           0 :                         spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
    7225           0 :                         snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
    7226           0 :                         rc = spdk_bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0,
    7227           0 :                                                    discovery_attach_controller_done, new_ctx,
    7228           0 :                                                    &new_ctx->drv_opts, &ctx->bdev_opts);
    7229           0 :                         if (rc == 0) {
    7230           0 :                                 TAILQ_INSERT_TAIL(&ctx->nvm_entry_ctxs, new_ctx, tailq);
    7231           0 :                                 ctx->attach_in_progress++;
    7232           0 :                         } else {
    7233           0 :                                 DISCOVERY_ERRLOG(ctx, "spdk_bdev_nvme_create failed (%s)\n", spdk_strerror(-rc));
    7234             :                         }
    7235           0 :                 }
    7236           0 :         }
    7237             : 
    7238           0 :         if (ctx->attach_in_progress == 0) {
    7239           0 :                 discovery_remove_controllers(ctx);
    7240           0 :         }
    7241           0 : }
    7242             : 
    7243             : static void
    7244           0 : get_discovery_log_page(struct discovery_ctx *ctx)
    7245             : {
    7246             :         int rc;
    7247             : 
    7248           0 :         assert(ctx->in_progress == false);
    7249           0 :         ctx->in_progress = true;
    7250           0 :         rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx);
    7251           0 :         if (rc != 0) {
    7252           0 :                 DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
    7253           0 :         }
    7254           0 :         DISCOVERY_INFOLOG(ctx, "sent discovery log page command\n");
    7255           0 : }
    7256             : 
    7257             : static void
    7258           0 : discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
    7259             : {
    7260           0 :         struct discovery_ctx *ctx = arg;
    7261           0 :         uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
    7262             : 
    7263           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    7264           0 :                 DISCOVERY_ERRLOG(ctx, "aer failed\n");
    7265           0 :                 return;
    7266             :         }
    7267             : 
    7268           0 :         if (log_page_id != SPDK_NVME_LOG_DISCOVERY) {
    7269           0 :                 DISCOVERY_ERRLOG(ctx, "unexpected log page 0x%x\n", log_page_id);
    7270           0 :                 return;
    7271             :         }
    7272             : 
    7273           0 :         DISCOVERY_INFOLOG(ctx, "got aer\n");
    7274           0 :         if (ctx->in_progress) {
    7275           0 :                 ctx->pending = true;
    7276           0 :                 return;
    7277             :         }
    7278             : 
    7279           0 :         get_discovery_log_page(ctx);
    7280           0 : }
    7281             : 
    7282             : static void
    7283           0 : discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    7284             :                     struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
    7285             : {
    7286           0 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    7287             :         struct discovery_ctx *ctx;
    7288             : 
    7289           0 :         ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, drv_opts);
    7290             : 
    7291           0 :         DISCOVERY_INFOLOG(ctx, "discovery ctrlr attached\n");
    7292           0 :         ctx->probe_ctx = NULL;
    7293           0 :         ctx->ctrlr = ctrlr;
    7294             : 
    7295           0 :         if (ctx->rc != 0) {
    7296           0 :                 DISCOVERY_ERRLOG(ctx, "encountered error while attaching discovery ctrlr: %d\n",
    7297             :                                  ctx->rc);
    7298           0 :                 return;
    7299             :         }
    7300             : 
    7301           0 :         spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx);
    7302           0 : }
    7303             : 
    7304             : static int
    7305           0 : discovery_poller(void *arg)
    7306             : {
    7307           0 :         struct discovery_ctx *ctx = arg;
    7308             :         struct spdk_nvme_transport_id *trid;
    7309             :         int rc;
    7310             : 
    7311           0 :         if (ctx->detach_ctx) {
    7312           0 :                 rc = spdk_nvme_detach_poll_async(ctx->detach_ctx);
    7313           0 :                 if (rc != -EAGAIN) {
    7314           0 :                         ctx->detach_ctx = NULL;
    7315           0 :                         ctx->ctrlr = NULL;
    7316           0 :                 }
    7317           0 :         } else if (ctx->stop) {
    7318           0 :                 if (ctx->ctrlr != NULL) {
    7319           0 :                         rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
    7320           0 :                         if (rc == 0) {
    7321           0 :                                 return SPDK_POLLER_BUSY;
    7322             :                         }
    7323           0 :                         DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
    7324           0 :                 }
    7325           0 :                 spdk_poller_unregister(&ctx->poller);
    7326           0 :                 TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
    7327           0 :                 assert(ctx->start_cb_fn == NULL);
    7328           0 :                 if (ctx->stop_cb_fn != NULL) {
    7329           0 :                         ctx->stop_cb_fn(ctx->cb_ctx);
    7330           0 :                 }
    7331           0 :                 free_discovery_ctx(ctx);
    7332           0 :         } else if (ctx->probe_ctx == NULL && ctx->ctrlr == NULL) {
    7333           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    7334           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
    7335           0 :                         assert(ctx->initializing);
    7336           0 :                         spdk_poller_unregister(&ctx->poller);
    7337           0 :                         TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
    7338           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    7339           0 :                         stop_discovery(ctx, NULL, NULL);
    7340           0 :                         free_discovery_ctx(ctx);
    7341           0 :                         return SPDK_POLLER_BUSY;
    7342             :                 }
    7343             : 
    7344           0 :                 assert(ctx->entry_ctx_in_use == NULL);
    7345           0 :                 ctx->entry_ctx_in_use = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
    7346           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    7347           0 :                 trid = &ctx->entry_ctx_in_use->trid;
    7348             : 
    7349             :                 /* All controllers must be configured explicitely either for multipath or failover.
    7350             :                  * While discovery use multipath mode, we need to set this in bdev options as well.
    7351             :                  */
    7352           0 :                 ctx->bdev_opts.multipath = true;
    7353             : 
    7354           0 :                 ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, discovery_attach_cb);
    7355           0 :                 if (ctx->probe_ctx) {
    7356           0 :                         spdk_poller_unregister(&ctx->poller);
    7357           0 :                         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000);
    7358           0 :                 } else {
    7359           0 :                         DISCOVERY_ERRLOG(ctx, "could not start discovery connect\n");
    7360           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    7361           0 :                         ctx->entry_ctx_in_use = NULL;
    7362             :                 }
    7363           0 :         } else if (ctx->probe_ctx) {
    7364           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    7365           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
    7366           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    7367           0 :                         return SPDK_POLLER_BUSY;
    7368             :                 }
    7369             : 
    7370           0 :                 rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
    7371           0 :                 if (rc != -EAGAIN) {
    7372           0 :                         if (ctx->rc != 0) {
    7373           0 :                                 assert(ctx->initializing);
    7374           0 :                                 stop_discovery(ctx, NULL, ctx->cb_ctx);
    7375           0 :                         } else {
    7376           0 :                                 assert(rc == 0);
    7377           0 :                                 DISCOVERY_INFOLOG(ctx, "discovery ctrlr connected\n");
    7378           0 :                                 ctx->rc = rc;
    7379           0 :                                 get_discovery_log_page(ctx);
    7380             :                         }
    7381           0 :                 }
    7382           0 :         } else {
    7383           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    7384           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching NVM ctrlrs\n");
    7385           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    7386             :                         /* We need to wait until all NVM ctrlrs are attached before we stop the
    7387             :                          * discovery service to make sure we don't detach a ctrlr that is still
    7388             :                          * being attached.
    7389             :                          */
    7390           0 :                         if (ctx->attach_in_progress == 0) {
    7391           0 :                                 stop_discovery(ctx, NULL, ctx->cb_ctx);
    7392           0 :                                 return SPDK_POLLER_BUSY;
    7393             :                         }
    7394           0 :                 }
    7395             : 
    7396           0 :                 rc = spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr);
    7397           0 :                 if (rc < 0) {
    7398           0 :                         spdk_poller_unregister(&ctx->poller);
    7399           0 :                         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
    7400           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    7401           0 :                         ctx->entry_ctx_in_use = NULL;
    7402             : 
    7403           0 :                         rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
    7404           0 :                         if (rc != 0) {
    7405           0 :                                 DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
    7406           0 :                                 ctx->ctrlr = NULL;
    7407           0 :                         }
    7408           0 :                 }
    7409             :         }
    7410             : 
    7411           0 :         return SPDK_POLLER_BUSY;
    7412           0 : }
    7413             : 
    7414             : static void
    7415           0 : start_discovery_poller(void *arg)
    7416             : {
    7417           0 :         struct discovery_ctx *ctx = arg;
    7418             : 
    7419           0 :         TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq);
    7420           0 :         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
    7421           0 : }
    7422             : 
    7423             : int
    7424           0 : bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid,
    7425             :                           const char *base_name,
    7426             :                           struct spdk_nvme_ctrlr_opts *drv_opts,
    7427             :                           struct spdk_bdev_nvme_ctrlr_opts *bdev_opts,
    7428             :                           uint64_t attach_timeout,
    7429             :                           bool from_mdns,
    7430             :                           spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx)
    7431             : {
    7432             :         struct discovery_ctx *ctx;
    7433             :         struct discovery_entry_ctx *discovery_entry_ctx;
    7434             : 
    7435           0 :         snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
    7436           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7437           0 :                 if (strcmp(ctx->name, base_name) == 0) {
    7438           0 :                         return -EEXIST;
    7439             :                 }
    7440             : 
    7441           0 :                 if (ctx->entry_ctx_in_use != NULL) {
    7442           0 :                         if (!spdk_nvme_transport_id_compare(trid, &ctx->entry_ctx_in_use->trid)) {
    7443           0 :                                 return -EEXIST;
    7444             :                         }
    7445           0 :                 }
    7446             : 
    7447           0 :                 TAILQ_FOREACH(discovery_entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
    7448           0 :                         if (!spdk_nvme_transport_id_compare(trid, &discovery_entry_ctx->trid)) {
    7449           0 :                                 return -EEXIST;
    7450             :                         }
    7451           0 :                 }
    7452           0 :         }
    7453             : 
    7454           0 :         ctx = calloc(1, sizeof(*ctx));
    7455           0 :         if (ctx == NULL) {
    7456           0 :                 return -ENOMEM;
    7457             :         }
    7458             : 
    7459           0 :         ctx->name = strdup(base_name);
    7460           0 :         if (ctx->name == NULL) {
    7461           0 :                 free_discovery_ctx(ctx);
    7462           0 :                 return -ENOMEM;
    7463             :         }
    7464           0 :         memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
    7465           0 :         memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
    7466           0 :         ctx->from_mdns_discovery_service = from_mdns;
    7467           0 :         ctx->bdev_opts.from_discovery_service = true;
    7468           0 :         ctx->calling_thread = spdk_get_thread();
    7469           0 :         ctx->start_cb_fn = cb_fn;
    7470           0 :         ctx->cb_ctx = cb_ctx;
    7471           0 :         ctx->initializing = true;
    7472           0 :         if (ctx->start_cb_fn) {
    7473             :                 /* We can use this when dumping json to denote if this RPC parameter
    7474             :                  * was specified or not.
    7475             :                  */
    7476           0 :                 ctx->wait_for_attach = true;
    7477           0 :         }
    7478           0 :         if (attach_timeout != 0) {
    7479           0 :                 ctx->timeout_ticks = spdk_get_ticks() + attach_timeout *
    7480           0 :                                      spdk_get_ticks_hz() / 1000ull;
    7481           0 :         }
    7482           0 :         TAILQ_INIT(&ctx->nvm_entry_ctxs);
    7483           0 :         TAILQ_INIT(&ctx->discovery_entry_ctxs);
    7484           0 :         memcpy(&ctx->trid, trid, sizeof(*trid));
    7485             :         /* Even if user did not specify hostnqn, we can still strdup("\0"); */
    7486           0 :         ctx->hostnqn = strdup(ctx->drv_opts.hostnqn);
    7487           0 :         if (ctx->hostnqn == NULL) {
    7488           0 :                 free_discovery_ctx(ctx);
    7489           0 :                 return -ENOMEM;
    7490             :         }
    7491           0 :         discovery_entry_ctx = create_discovery_entry_ctx(ctx, trid);
    7492           0 :         if (discovery_entry_ctx == NULL) {
    7493           0 :                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7494           0 :                 free_discovery_ctx(ctx);
    7495           0 :                 return -ENOMEM;
    7496             :         }
    7497             : 
    7498           0 :         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, discovery_entry_ctx, tailq);
    7499           0 :         spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx);
    7500           0 :         return 0;
    7501           0 : }
    7502             : 
    7503             : int
    7504           0 : bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
    7505             : {
    7506             :         struct discovery_ctx *ctx;
    7507             : 
    7508           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7509           0 :                 if (strcmp(name, ctx->name) == 0) {
    7510           0 :                         if (ctx->stop) {
    7511           0 :                                 return -EALREADY;
    7512             :                         }
    7513             :                         /* If we're still starting the discovery service and ->rc is non-zero, we're
    7514             :                          * going to stop it as soon as we can
    7515             :                          */
    7516           0 :                         if (ctx->initializing && ctx->rc != 0) {
    7517           0 :                                 return -EALREADY;
    7518             :                         }
    7519           0 :                         stop_discovery(ctx, cb_fn, cb_ctx);
    7520           0 :                         return 0;
    7521             :                 }
    7522           0 :         }
    7523             : 
    7524           0 :         return -ENOENT;
    7525           0 : }
    7526             : 
    7527             : static int
    7528           1 : bdev_nvme_library_init(void)
    7529             : {
    7530           1 :         g_bdev_nvme_init_thread = spdk_get_thread();
    7531             : 
    7532           1 :         spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_create_poll_group_cb,
    7533             :                                 bdev_nvme_destroy_poll_group_cb,
    7534             :                                 sizeof(struct nvme_poll_group),  "nvme_poll_groups");
    7535             : 
    7536           1 :         return 0;
    7537             : }
    7538             : 
    7539             : static void
    7540           1 : bdev_nvme_fini_destruct_ctrlrs(void)
    7541             : {
    7542             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    7543             :         struct nvme_ctrlr *nvme_ctrlr;
    7544             : 
    7545           1 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    7546           1 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
    7547           0 :                 TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    7548           0 :                         pthread_mutex_lock(&nvme_ctrlr->mutex);
    7549           0 :                         if (nvme_ctrlr->destruct) {
    7550             :                                 /* This controller's destruction was already started
    7551             :                                  * before the application started shutting down
    7552             :                                  */
    7553           0 :                                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    7554           0 :                                 continue;
    7555             :                         }
    7556           0 :                         nvme_ctrlr->destruct = true;
    7557           0 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    7558             : 
    7559           0 :                         spdk_thread_send_msg(nvme_ctrlr->thread, _nvme_ctrlr_destruct,
    7560           0 :                                              nvme_ctrlr);
    7561           0 :                 }
    7562           0 :         }
    7563             : 
    7564           1 :         g_bdev_nvme_module_finish = true;
    7565           1 :         if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    7566           1 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7567           1 :                 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
    7568           1 :                 spdk_bdev_module_fini_done();
    7569           1 :                 return;
    7570             :         }
    7571             : 
    7572           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7573           1 : }
    7574             : 
    7575             : static void
    7576           0 : check_discovery_fini(void *arg)
    7577             : {
    7578           0 :         if (TAILQ_EMPTY(&g_discovery_ctxs)) {
    7579           0 :                 bdev_nvme_fini_destruct_ctrlrs();
    7580           0 :         }
    7581           0 : }
    7582             : 
    7583             : static void
    7584           1 : bdev_nvme_library_fini(void)
    7585             : {
    7586             :         struct nvme_probe_skip_entry *entry, *entry_tmp;
    7587             :         struct discovery_ctx *ctx;
    7588             : 
    7589           1 :         spdk_poller_unregister(&g_hotplug_poller);
    7590           1 :         free(g_hotplug_probe_ctx);
    7591           1 :         g_hotplug_probe_ctx = NULL;
    7592             : 
    7593           1 :         TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
    7594           0 :                 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
    7595           0 :                 free(entry);
    7596           0 :         }
    7597             : 
    7598           1 :         assert(spdk_get_thread() == g_bdev_nvme_init_thread);
    7599           1 :         if (TAILQ_EMPTY(&g_discovery_ctxs)) {
    7600           1 :                 bdev_nvme_fini_destruct_ctrlrs();
    7601           1 :         } else {
    7602           0 :                 TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7603           0 :                         stop_discovery(ctx, check_discovery_fini, NULL);
    7604           0 :                 }
    7605             :         }
    7606           1 : }
    7607             : 
    7608             : static void
    7609           0 : bdev_nvme_verify_pi_error(struct nvme_bdev_io *bio)
    7610             : {
    7611           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7612           0 :         struct spdk_bdev *bdev = bdev_io->bdev;
    7613             :         struct spdk_dif_ctx dif_ctx;
    7614           0 :         struct spdk_dif_error err_blk = {};
    7615             :         int rc;
    7616             :         struct spdk_dif_ctx_init_ext_opts dif_opts;
    7617             : 
    7618           0 :         dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
    7619           0 :         dif_opts.dif_pi_format = bdev->dif_pi_format;
    7620           0 :         rc = spdk_dif_ctx_init(&dif_ctx,
    7621           0 :                                bdev->blocklen, bdev->md_len, bdev->md_interleave,
    7622           0 :                                bdev->dif_is_head_of_md, bdev->dif_type,
    7623           0 :                                bdev_io->u.bdev.dif_check_flags,
    7624           0 :                                bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0, &dif_opts);
    7625           0 :         if (rc != 0) {
    7626           0 :                 SPDK_ERRLOG("Initialization of DIF context failed\n");
    7627           0 :                 return;
    7628             :         }
    7629             : 
    7630           0 :         if (bdev->md_interleave) {
    7631           0 :                 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
    7632           0 :                                      bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
    7633           0 :         } else {
    7634           0 :                 struct iovec md_iov = {
    7635           0 :                         .iov_base       = bdev_io->u.bdev.md_buf,
    7636           0 :                         .iov_len        = bdev_io->u.bdev.num_blocks * bdev->md_len,
    7637             :                 };
    7638             : 
    7639           0 :                 rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
    7640           0 :                                      &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
    7641             :         }
    7642             : 
    7643           0 :         if (rc != 0) {
    7644           0 :                 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
    7645             :                             err_blk.err_type, err_blk.err_offset);
    7646           0 :         } else {
    7647           0 :                 SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
    7648             :         }
    7649           0 : }
    7650             : 
    7651             : static void
    7652           0 : bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7653             : {
    7654           0 :         struct nvme_bdev_io *bio = ref;
    7655             : 
    7656           0 :         if (spdk_nvme_cpl_is_success(cpl)) {
    7657             :                 /* Run PI verification for read data buffer. */
    7658           0 :                 bdev_nvme_verify_pi_error(bio);
    7659           0 :         }
    7660             : 
    7661             :         /* Return original completion status */
    7662           0 :         bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
    7663           0 : }
    7664             : 
    7665             : static void
    7666           3 : bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7667             : {
    7668           3 :         struct nvme_bdev_io *bio = ref;
    7669           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7670             :         int ret;
    7671             : 
    7672           3 :         if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
    7673           0 :                 SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
    7674             :                             cpl->status.sct, cpl->status.sc);
    7675             : 
    7676             :                 /* Save completion status to use after verifying PI error. */
    7677           0 :                 bio->cpl = *cpl;
    7678             : 
    7679           0 :                 if (spdk_likely(nvme_io_path_is_available(bio->io_path))) {
    7680             :                         /* Read without PI checking to verify PI error. */
    7681           0 :                         ret = bdev_nvme_no_pi_readv(bio,
    7682           0 :                                                     bdev_io->u.bdev.iovs,
    7683           0 :                                                     bdev_io->u.bdev.iovcnt,
    7684           0 :                                                     bdev_io->u.bdev.md_buf,
    7685           0 :                                                     bdev_io->u.bdev.num_blocks,
    7686           0 :                                                     bdev_io->u.bdev.offset_blocks);
    7687           0 :                         if (ret == 0) {
    7688           0 :                                 return;
    7689             :                         }
    7690           0 :                 }
    7691           0 :         }
    7692             : 
    7693           3 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7694           3 : }
    7695             : 
    7696             : static void
    7697          25 : bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7698             : {
    7699          25 :         struct nvme_bdev_io *bio = ref;
    7700             : 
    7701          25 :         if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
    7702           0 :                 SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
    7703             :                             cpl->status.sct, cpl->status.sc);
    7704             :                 /* Run PI verification for write data buffer if PI error is detected. */
    7705           0 :                 bdev_nvme_verify_pi_error(bio);
    7706           0 :         }
    7707             : 
    7708          25 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7709          25 : }
    7710             : 
    7711             : static void
    7712           0 : bdev_nvme_zone_appendv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7713             : {
    7714           0 :         struct nvme_bdev_io *bio = ref;
    7715           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7716             : 
    7717             :         /* spdk_bdev_io_get_append_location() requires that the ALBA is stored in offset_blocks.
    7718             :          * Additionally, offset_blocks has to be set before calling bdev_nvme_verify_pi_error().
    7719             :          */
    7720           0 :         bdev_io->u.bdev.offset_blocks = *(uint64_t *)&cpl->cdw0;
    7721             : 
    7722           0 :         if (spdk_nvme_cpl_is_pi_error(cpl)) {
    7723           0 :                 SPDK_ERRLOG("zone append completed with PI error (sct=%d, sc=%d)\n",
    7724             :                             cpl->status.sct, cpl->status.sc);
    7725             :                 /* Run PI verification for zone append data buffer if PI error is detected. */
    7726           0 :                 bdev_nvme_verify_pi_error(bio);
    7727           0 :         }
    7728             : 
    7729           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7730           0 : }
    7731             : 
    7732             : static void
    7733           1 : bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7734             : {
    7735           1 :         struct nvme_bdev_io *bio = ref;
    7736             : 
    7737           1 :         if (spdk_nvme_cpl_is_pi_error(cpl)) {
    7738           0 :                 SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n",
    7739             :                             cpl->status.sct, cpl->status.sc);
    7740             :                 /* Run PI verification for compare data buffer if PI error is detected. */
    7741           0 :                 bdev_nvme_verify_pi_error(bio);
    7742           0 :         }
    7743             : 
    7744           1 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7745           1 : }
    7746             : 
    7747             : static void
    7748           4 : bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7749             : {
    7750           4 :         struct nvme_bdev_io *bio = ref;
    7751             : 
    7752             :         /* Compare operation completion */
    7753           4 :         if (!bio->first_fused_completed) {
    7754             :                 /* Save compare result for write callback */
    7755           2 :                 bio->cpl = *cpl;
    7756           2 :                 bio->first_fused_completed = true;
    7757           2 :                 return;
    7758             :         }
    7759             : 
    7760             :         /* Write operation completion */
    7761           2 :         if (spdk_nvme_cpl_is_error(&bio->cpl)) {
    7762             :                 /* If bio->cpl is already an error, it means the compare operation failed.  In that case,
    7763             :                  * complete the IO with the compare operation's status.
    7764             :                  */
    7765           1 :                 if (!spdk_nvme_cpl_is_error(cpl)) {
    7766           1 :                         SPDK_ERRLOG("Unexpected write success after compare failure.\n");
    7767           1 :                 }
    7768             : 
    7769           1 :                 bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
    7770           1 :         } else {
    7771           1 :                 bdev_nvme_io_complete_nvme_status(bio, cpl);
    7772             :         }
    7773           4 : }
    7774             : 
    7775             : static void
    7776           1 : bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7777             : {
    7778           1 :         struct nvme_bdev_io *bio = ref;
    7779             : 
    7780           1 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7781           1 : }
    7782             : 
    7783             : static int
    7784           0 : fill_zone_from_report(struct spdk_bdev_zone_info *info, struct spdk_nvme_zns_zone_desc *desc)
    7785             : {
    7786           0 :         switch (desc->zt) {
    7787             :         case SPDK_NVME_ZONE_TYPE_SEQWR:
    7788           0 :                 info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
    7789           0 :                 break;
    7790             :         default:
    7791           0 :                 SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", desc->zt);
    7792           0 :                 return -EIO;
    7793             :         }
    7794             : 
    7795           0 :         switch (desc->zs) {
    7796             :         case SPDK_NVME_ZONE_STATE_EMPTY:
    7797           0 :                 info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
    7798           0 :                 break;
    7799             :         case SPDK_NVME_ZONE_STATE_IOPEN:
    7800           0 :                 info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
    7801           0 :                 break;
    7802             :         case SPDK_NVME_ZONE_STATE_EOPEN:
    7803           0 :                 info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
    7804           0 :                 break;
    7805             :         case SPDK_NVME_ZONE_STATE_CLOSED:
    7806           0 :                 info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
    7807           0 :                 break;
    7808             :         case SPDK_NVME_ZONE_STATE_RONLY:
    7809           0 :                 info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
    7810           0 :                 break;
    7811             :         case SPDK_NVME_ZONE_STATE_FULL:
    7812           0 :                 info->state = SPDK_BDEV_ZONE_STATE_FULL;
    7813           0 :                 break;
    7814             :         case SPDK_NVME_ZONE_STATE_OFFLINE:
    7815           0 :                 info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
    7816           0 :                 break;
    7817             :         default:
    7818           0 :                 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", desc->zs);
    7819           0 :                 return -EIO;
    7820             :         }
    7821             : 
    7822           0 :         info->zone_id = desc->zslba;
    7823           0 :         info->write_pointer = desc->wp;
    7824           0 :         info->capacity = desc->zcap;
    7825             : 
    7826           0 :         return 0;
    7827           0 : }
    7828             : 
    7829             : static void
    7830           0 : bdev_nvme_get_zone_info_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7831             : {
    7832           0 :         struct nvme_bdev_io *bio = ref;
    7833           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7834           0 :         uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
    7835           0 :         uint32_t zones_to_copy = bdev_io->u.zone_mgmt.num_zones;
    7836           0 :         struct spdk_bdev_zone_info *info = bdev_io->u.zone_mgmt.buf;
    7837             :         uint64_t max_zones_per_buf, i;
    7838             :         uint32_t zone_report_bufsize;
    7839             :         struct spdk_nvme_ns *ns;
    7840             :         struct spdk_nvme_qpair *qpair;
    7841             :         int ret;
    7842             : 
    7843           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    7844           0 :                 goto out_complete_io_nvme_cpl;
    7845             :         }
    7846             : 
    7847           0 :         if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
    7848           0 :                 ret = -ENXIO;
    7849           0 :                 goto out_complete_io_ret;
    7850             :         }
    7851             : 
    7852           0 :         ns = bio->io_path->nvme_ns->ns;
    7853           0 :         qpair = bio->io_path->qpair->qpair;
    7854             : 
    7855           0 :         zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
    7856           0 :         max_zones_per_buf = (zone_report_bufsize - sizeof(*bio->zone_report_buf)) /
    7857             :                             sizeof(bio->zone_report_buf->descs[0]);
    7858             : 
    7859           0 :         if (bio->zone_report_buf->nr_zones > max_zones_per_buf) {
    7860           0 :                 ret = -EINVAL;
    7861           0 :                 goto out_complete_io_ret;
    7862             :         }
    7863             : 
    7864           0 :         if (!bio->zone_report_buf->nr_zones) {
    7865           0 :                 ret = -EINVAL;
    7866           0 :                 goto out_complete_io_ret;
    7867             :         }
    7868             : 
    7869           0 :         for (i = 0; i < bio->zone_report_buf->nr_zones && bio->handled_zones < zones_to_copy; i++) {
    7870           0 :                 ret = fill_zone_from_report(&info[bio->handled_zones],
    7871           0 :                                             &bio->zone_report_buf->descs[i]);
    7872           0 :                 if (ret) {
    7873           0 :                         goto out_complete_io_ret;
    7874             :                 }
    7875           0 :                 bio->handled_zones++;
    7876           0 :         }
    7877             : 
    7878           0 :         if (bio->handled_zones < zones_to_copy) {
    7879           0 :                 uint64_t zone_size_lba = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    7880           0 :                 uint64_t slba = zone_id + (zone_size_lba * bio->handled_zones);
    7881             : 
    7882           0 :                 memset(bio->zone_report_buf, 0, zone_report_bufsize);
    7883           0 :                 ret = spdk_nvme_zns_report_zones(ns, qpair,
    7884           0 :                                                  bio->zone_report_buf, zone_report_bufsize,
    7885           0 :                                                  slba, SPDK_NVME_ZRA_LIST_ALL, true,
    7886           0 :                                                  bdev_nvme_get_zone_info_done, bio);
    7887           0 :                 if (!ret) {
    7888           0 :                         return;
    7889             :                 } else {
    7890           0 :                         goto out_complete_io_ret;
    7891             :                 }
    7892             :         }
    7893             : 
    7894             : out_complete_io_nvme_cpl:
    7895           0 :         free(bio->zone_report_buf);
    7896           0 :         bio->zone_report_buf = NULL;
    7897           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7898           0 :         return;
    7899             : 
    7900             : out_complete_io_ret:
    7901           0 :         free(bio->zone_report_buf);
    7902           0 :         bio->zone_report_buf = NULL;
    7903           0 :         bdev_nvme_io_complete(bio, ret);
    7904           0 : }
    7905             : 
    7906             : static void
    7907           0 : bdev_nvme_zone_management_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7908             : {
    7909           0 :         struct nvme_bdev_io *bio = ref;
    7910             : 
    7911           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7912           0 : }
    7913             : 
    7914             : static void
    7915           4 : bdev_nvme_admin_passthru_complete_nvme_status(void *ctx)
    7916             : {
    7917           4 :         struct nvme_bdev_io *bio = ctx;
    7918           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7919           4 :         const struct spdk_nvme_cpl *cpl = &bio->cpl;
    7920             : 
    7921           4 :         assert(bdev_nvme_io_type_is_admin(bdev_io->type));
    7922             : 
    7923           4 :         __bdev_nvme_io_complete(bdev_io, 0, cpl);
    7924           4 : }
    7925             : 
    7926             : static void
    7927           3 : bdev_nvme_abort_complete(void *ctx)
    7928             : {
    7929           3 :         struct nvme_bdev_io *bio = ctx;
    7930           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7931             : 
    7932           3 :         if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) {
    7933           3 :                 __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS, NULL);
    7934           3 :         } else {
    7935           0 :                 __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED, NULL);
    7936             :         }
    7937           3 : }
    7938             : 
    7939             : static void
    7940           3 : bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7941             : {
    7942           3 :         struct nvme_bdev_io *bio = ref;
    7943           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7944             : 
    7945           3 :         bio->cpl = *cpl;
    7946           3 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_nvme_abort_complete, bio);
    7947           3 : }
    7948             : 
    7949             : static void
    7950           4 : bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7951             : {
    7952           4 :         struct nvme_bdev_io *bio = ref;
    7953           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7954             : 
    7955           4 :         bio->cpl = *cpl;
    7956           8 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
    7957           4 :                              bdev_nvme_admin_passthru_complete_nvme_status, bio);
    7958           4 : }
    7959             : 
    7960             : static void
    7961           0 : bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
    7962             : {
    7963           0 :         struct nvme_bdev_io *bio = ref;
    7964             :         struct iovec *iov;
    7965             : 
    7966           0 :         bio->iov_offset = sgl_offset;
    7967           0 :         for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
    7968           0 :                 iov = &bio->iovs[bio->iovpos];
    7969           0 :                 if (bio->iov_offset < iov->iov_len) {
    7970           0 :                         break;
    7971             :                 }
    7972             : 
    7973           0 :                 bio->iov_offset -= iov->iov_len;
    7974           0 :         }
    7975           0 : }
    7976             : 
    7977             : static int
    7978           0 : bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
    7979             : {
    7980           0 :         struct nvme_bdev_io *bio = ref;
    7981             :         struct iovec *iov;
    7982             : 
    7983           0 :         assert(bio->iovpos < bio->iovcnt);
    7984             : 
    7985           0 :         iov = &bio->iovs[bio->iovpos];
    7986             : 
    7987           0 :         *address = iov->iov_base;
    7988           0 :         *length = iov->iov_len;
    7989             : 
    7990           0 :         if (bio->iov_offset) {
    7991           0 :                 assert(bio->iov_offset <= iov->iov_len);
    7992           0 :                 *address += bio->iov_offset;
    7993           0 :                 *length -= bio->iov_offset;
    7994           0 :         }
    7995             : 
    7996           0 :         bio->iov_offset += *length;
    7997           0 :         if (bio->iov_offset == iov->iov_len) {
    7998           0 :                 bio->iovpos++;
    7999           0 :                 bio->iov_offset = 0;
    8000           0 :         }
    8001             : 
    8002           0 :         return 0;
    8003             : }
    8004             : 
    8005             : static void
    8006           0 : bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset)
    8007             : {
    8008           0 :         struct nvme_bdev_io *bio = ref;
    8009             :         struct iovec *iov;
    8010             : 
    8011           0 :         bio->fused_iov_offset = sgl_offset;
    8012           0 :         for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) {
    8013           0 :                 iov = &bio->fused_iovs[bio->fused_iovpos];
    8014           0 :                 if (bio->fused_iov_offset < iov->iov_len) {
    8015           0 :                         break;
    8016             :                 }
    8017             : 
    8018           0 :                 bio->fused_iov_offset -= iov->iov_len;
    8019           0 :         }
    8020           0 : }
    8021             : 
    8022             : static int
    8023           0 : bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length)
    8024             : {
    8025           0 :         struct nvme_bdev_io *bio = ref;
    8026             :         struct iovec *iov;
    8027             : 
    8028           0 :         assert(bio->fused_iovpos < bio->fused_iovcnt);
    8029             : 
    8030           0 :         iov = &bio->fused_iovs[bio->fused_iovpos];
    8031             : 
    8032           0 :         *address = iov->iov_base;
    8033           0 :         *length = iov->iov_len;
    8034             : 
    8035           0 :         if (bio->fused_iov_offset) {
    8036           0 :                 assert(bio->fused_iov_offset <= iov->iov_len);
    8037           0 :                 *address += bio->fused_iov_offset;
    8038           0 :                 *length -= bio->fused_iov_offset;
    8039           0 :         }
    8040             : 
    8041           0 :         bio->fused_iov_offset += *length;
    8042           0 :         if (bio->fused_iov_offset == iov->iov_len) {
    8043           0 :                 bio->fused_iovpos++;
    8044           0 :                 bio->fused_iov_offset = 0;
    8045           0 :         }
    8046             : 
    8047           0 :         return 0;
    8048             : }
    8049             : 
    8050             : static int
    8051           0 : bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    8052             :                       void *md, uint64_t lba_count, uint64_t lba)
    8053             : {
    8054             :         int rc;
    8055             : 
    8056           0 :         SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 " without PI check\n",
    8057             :                       lba_count, lba);
    8058             : 
    8059           0 :         bio->iovs = iov;
    8060           0 :         bio->iovcnt = iovcnt;
    8061           0 :         bio->iovpos = 0;
    8062           0 :         bio->iov_offset = 0;
    8063             : 
    8064           0 :         rc = spdk_nvme_ns_cmd_readv_with_md(bio->io_path->nvme_ns->ns,
    8065           0 :                                             bio->io_path->qpair->qpair,
    8066           0 :                                             lba, lba_count,
    8067           0 :                                             bdev_nvme_no_pi_readv_done, bio, 0,
    8068             :                                             bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    8069           0 :                                             md, 0, 0);
    8070             : 
    8071           0 :         if (rc != 0 && rc != -ENOMEM) {
    8072           0 :                 SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
    8073           0 :         }
    8074           0 :         return rc;
    8075             : }
    8076             : 
    8077             : static int
    8078           3 : bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    8079             :                 void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
    8080             :                 struct spdk_memory_domain *domain, void *domain_ctx,
    8081             :                 struct spdk_accel_sequence *seq)
    8082             : {
    8083           3 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8084           3 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8085             :         int rc;
    8086             : 
    8087           3 :         SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    8088             :                       lba_count, lba);
    8089             : 
    8090           3 :         bio->iovs = iov;
    8091           3 :         bio->iovcnt = iovcnt;
    8092           3 :         bio->iovpos = 0;
    8093           3 :         bio->iov_offset = 0;
    8094             : 
    8095           3 :         if (domain != NULL || seq != NULL) {
    8096           1 :                 bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
    8097           1 :                 bio->ext_opts.memory_domain = domain;
    8098           1 :                 bio->ext_opts.memory_domain_ctx = domain_ctx;
    8099           1 :                 bio->ext_opts.io_flags = flags;
    8100           1 :                 bio->ext_opts.metadata = md;
    8101           1 :                 bio->ext_opts.accel_sequence = seq;
    8102             : 
    8103           1 :                 if (iovcnt == 1) {
    8104           2 :                         rc = spdk_nvme_ns_cmd_read_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_readv_done,
    8105           1 :                                                        bio, &bio->ext_opts);
    8106           1 :                 } else {
    8107           0 :                         rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count,
    8108           0 :                                                         bdev_nvme_readv_done, bio,
    8109             :                                                         bdev_nvme_queued_reset_sgl,
    8110             :                                                         bdev_nvme_queued_next_sge,
    8111           0 :                                                         &bio->ext_opts);
    8112             :                 }
    8113           3 :         } else if (iovcnt == 1) {
    8114           4 :                 rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base,
    8115           2 :                                                    md, lba, lba_count, bdev_nvme_readv_done,
    8116           2 :                                                    bio, flags, 0, 0);
    8117           2 :         } else {
    8118           0 :                 rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count,
    8119           0 :                                                     bdev_nvme_readv_done, bio, flags,
    8120             :                                                     bdev_nvme_queued_reset_sgl,
    8121           0 :                                                     bdev_nvme_queued_next_sge, md, 0, 0);
    8122             :         }
    8123             : 
    8124           3 :         if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
    8125           0 :                 SPDK_ERRLOG("readv failed: rc = %d\n", rc);
    8126           0 :         }
    8127           3 :         return rc;
    8128             : }
    8129             : 
    8130             : static int
    8131          25 : bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    8132             :                  void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
    8133             :                  struct spdk_memory_domain *domain, void *domain_ctx,
    8134             :                  struct spdk_accel_sequence *seq,
    8135             :                  union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13)
    8136             : {
    8137          25 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8138          25 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8139             :         int rc;
    8140             : 
    8141          25 :         SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    8142             :                       lba_count, lba);
    8143             : 
    8144          25 :         bio->iovs = iov;
    8145          25 :         bio->iovcnt = iovcnt;
    8146          25 :         bio->iovpos = 0;
    8147          25 :         bio->iov_offset = 0;
    8148             : 
    8149          25 :         if (domain != NULL || seq != NULL) {
    8150           0 :                 bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
    8151           0 :                 bio->ext_opts.memory_domain = domain;
    8152           0 :                 bio->ext_opts.memory_domain_ctx = domain_ctx;
    8153           0 :                 bio->ext_opts.io_flags = flags | SPDK_NVME_IO_FLAGS_DIRECTIVE(cdw12.write.dtype);
    8154           0 :                 bio->ext_opts.cdw13 = cdw13.raw;
    8155           0 :                 bio->ext_opts.metadata = md;
    8156           0 :                 bio->ext_opts.accel_sequence = seq;
    8157             : 
    8158           0 :                 if (iovcnt == 1) {
    8159           0 :                         rc = spdk_nvme_ns_cmd_write_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_writev_done,
    8160           0 :                                                         bio, &bio->ext_opts);
    8161           0 :                 } else {
    8162           0 :                         rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count,
    8163           0 :                                                          bdev_nvme_writev_done, bio,
    8164             :                                                          bdev_nvme_queued_reset_sgl,
    8165             :                                                          bdev_nvme_queued_next_sge,
    8166           0 :                                                          &bio->ext_opts);
    8167             :                 }
    8168          25 :         } else if (iovcnt == 1) {
    8169          50 :                 rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base,
    8170          25 :                                                     md, lba, lba_count, bdev_nvme_writev_done,
    8171          25 :                                                     bio, flags, 0, 0);
    8172          25 :         } else {
    8173           0 :                 rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
    8174           0 :                                                      bdev_nvme_writev_done, bio, flags,
    8175             :                                                      bdev_nvme_queued_reset_sgl,
    8176           0 :                                                      bdev_nvme_queued_next_sge, md, 0, 0);
    8177             :         }
    8178             : 
    8179          25 :         if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
    8180           0 :                 SPDK_ERRLOG("writev failed: rc = %d\n", rc);
    8181           0 :         }
    8182          25 :         return rc;
    8183             : }
    8184             : 
    8185             : static int
    8186           0 : bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    8187             :                        void *md, uint64_t lba_count, uint64_t zslba,
    8188             :                        uint32_t flags)
    8189             : {
    8190           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8191           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8192             :         int rc;
    8193             : 
    8194           0 :         SPDK_DEBUGLOG(bdev_nvme, "zone append %" PRIu64 " blocks to zone start lba %#" PRIx64 "\n",
    8195             :                       lba_count, zslba);
    8196             : 
    8197           0 :         bio->iovs = iov;
    8198           0 :         bio->iovcnt = iovcnt;
    8199           0 :         bio->iovpos = 0;
    8200           0 :         bio->iov_offset = 0;
    8201             : 
    8202           0 :         if (iovcnt == 1) {
    8203           0 :                 rc = spdk_nvme_zns_zone_append_with_md(ns, qpair, iov[0].iov_base, md, zslba,
    8204           0 :                                                        lba_count,
    8205           0 :                                                        bdev_nvme_zone_appendv_done, bio,
    8206           0 :                                                        flags,
    8207             :                                                        0, 0);
    8208           0 :         } else {
    8209           0 :                 rc = spdk_nvme_zns_zone_appendv_with_md(ns, qpair, zslba, lba_count,
    8210           0 :                                                         bdev_nvme_zone_appendv_done, bio, flags,
    8211             :                                                         bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    8212           0 :                                                         md, 0, 0);
    8213             :         }
    8214             : 
    8215           0 :         if (rc != 0 && rc != -ENOMEM) {
    8216           0 :                 SPDK_ERRLOG("zone append failed: rc = %d\n", rc);
    8217           0 :         }
    8218           0 :         return rc;
    8219             : }
    8220             : 
    8221             : static int
    8222           1 : bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    8223             :                    void *md, uint64_t lba_count, uint64_t lba,
    8224             :                    uint32_t flags)
    8225             : {
    8226             :         int rc;
    8227             : 
    8228           1 :         SPDK_DEBUGLOG(bdev_nvme, "compare %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    8229             :                       lba_count, lba);
    8230             : 
    8231           1 :         bio->iovs = iov;
    8232           1 :         bio->iovcnt = iovcnt;
    8233           1 :         bio->iovpos = 0;
    8234           1 :         bio->iov_offset = 0;
    8235             : 
    8236           2 :         rc = spdk_nvme_ns_cmd_comparev_with_md(bio->io_path->nvme_ns->ns,
    8237           1 :                                                bio->io_path->qpair->qpair,
    8238           1 :                                                lba, lba_count,
    8239           1 :                                                bdev_nvme_comparev_done, bio, flags,
    8240             :                                                bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    8241           1 :                                                md, 0, 0);
    8242             : 
    8243           1 :         if (rc != 0 && rc != -ENOMEM) {
    8244           0 :                 SPDK_ERRLOG("comparev failed: rc = %d\n", rc);
    8245           0 :         }
    8246           1 :         return rc;
    8247             : }
    8248             : 
    8249             : static int
    8250           2 : bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt,
    8251             :                               struct iovec *write_iov, int write_iovcnt,
    8252             :                               void *md, uint64_t lba_count, uint64_t lba, uint32_t flags)
    8253             : {
    8254           2 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8255           2 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8256           2 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    8257             :         int rc;
    8258             : 
    8259           2 :         SPDK_DEBUGLOG(bdev_nvme, "compare and write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    8260             :                       lba_count, lba);
    8261             : 
    8262           2 :         bio->iovs = cmp_iov;
    8263           2 :         bio->iovcnt = cmp_iovcnt;
    8264           2 :         bio->iovpos = 0;
    8265           2 :         bio->iov_offset = 0;
    8266           2 :         bio->fused_iovs = write_iov;
    8267           2 :         bio->fused_iovcnt = write_iovcnt;
    8268           2 :         bio->fused_iovpos = 0;
    8269           2 :         bio->fused_iov_offset = 0;
    8270             : 
    8271           2 :         if (bdev_io->num_retries == 0) {
    8272           2 :                 bio->first_fused_submitted = false;
    8273           2 :                 bio->first_fused_completed = false;
    8274           2 :         }
    8275             : 
    8276           2 :         if (!bio->first_fused_submitted) {
    8277           2 :                 flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST;
    8278           2 :                 memset(&bio->cpl, 0, sizeof(bio->cpl));
    8279             : 
    8280           4 :                 rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count,
    8281           2 :                                                        bdev_nvme_comparev_and_writev_done, bio, flags,
    8282           2 :                                                        bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0);
    8283           2 :                 if (rc == 0) {
    8284           2 :                         bio->first_fused_submitted = true;
    8285           2 :                         flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST;
    8286           2 :                 } else {
    8287           0 :                         if (rc != -ENOMEM) {
    8288           0 :                                 SPDK_ERRLOG("compare failed: rc = %d\n", rc);
    8289           0 :                         }
    8290           0 :                         return rc;
    8291             :                 }
    8292           2 :         }
    8293             : 
    8294           2 :         flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND;
    8295             : 
    8296           4 :         rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
    8297           2 :                                              bdev_nvme_comparev_and_writev_done, bio, flags,
    8298           2 :                                              bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0);
    8299           2 :         if (rc != 0 && rc != -ENOMEM) {
    8300           0 :                 SPDK_ERRLOG("write failed: rc = %d\n", rc);
    8301           0 :                 rc = 0;
    8302           0 :         }
    8303             : 
    8304           2 :         return rc;
    8305           2 : }
    8306             : 
    8307             : static int
    8308           1 : bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
    8309             : {
    8310             :         struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
    8311             :         struct spdk_nvme_dsm_range *range;
    8312             :         uint64_t offset, remaining;
    8313             :         uint64_t num_ranges_u64;
    8314             :         uint16_t num_ranges;
    8315             :         int rc;
    8316             : 
    8317           1 :         num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
    8318             :                          SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8319           1 :         if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
    8320           0 :                 SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
    8321           0 :                 return -EINVAL;
    8322             :         }
    8323           1 :         num_ranges = (uint16_t)num_ranges_u64;
    8324             : 
    8325           1 :         offset = offset_blocks;
    8326           1 :         remaining = num_blocks;
    8327           1 :         range = &dsm_ranges[0];
    8328             : 
    8329             :         /* Fill max-size ranges until the remaining blocks fit into one range */
    8330           1 :         while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
    8331           0 :                 range->attributes.raw = 0;
    8332           0 :                 range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8333           0 :                 range->starting_lba = offset;
    8334             : 
    8335           0 :                 offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8336           0 :                 remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8337           0 :                 range++;
    8338             :         }
    8339             : 
    8340             :         /* Final range describes the remaining blocks */
    8341           1 :         range->attributes.raw = 0;
    8342           1 :         range->length = remaining;
    8343           1 :         range->starting_lba = offset;
    8344             : 
    8345           2 :         rc = spdk_nvme_ns_cmd_dataset_management(bio->io_path->nvme_ns->ns,
    8346           1 :                         bio->io_path->qpair->qpair,
    8347             :                         SPDK_NVME_DSM_ATTR_DEALLOCATE,
    8348           1 :                         dsm_ranges, num_ranges,
    8349           1 :                         bdev_nvme_queued_done, bio);
    8350             : 
    8351           1 :         return rc;
    8352           1 : }
    8353             : 
    8354             : static int
    8355           0 : bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
    8356             : {
    8357           0 :         if (num_blocks > UINT16_MAX + 1) {
    8358           0 :                 SPDK_ERRLOG("NVMe write zeroes is limited to 16-bit block count\n");
    8359           0 :                 return -EINVAL;
    8360             :         }
    8361             : 
    8362           0 :         return spdk_nvme_ns_cmd_write_zeroes(bio->io_path->nvme_ns->ns,
    8363           0 :                                              bio->io_path->qpair->qpair,
    8364           0 :                                              offset_blocks, num_blocks,
    8365           0 :                                              bdev_nvme_queued_done, bio,
    8366             :                                              0);
    8367           0 : }
    8368             : 
    8369             : static int
    8370           0 : bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones,
    8371             :                         struct spdk_bdev_zone_info *info)
    8372             : {
    8373           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8374           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8375           0 :         uint32_t zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8376           0 :         uint64_t zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    8377           0 :         uint64_t total_zones = spdk_nvme_zns_ns_get_num_zones(ns);
    8378             : 
    8379           0 :         if (zone_id % zone_size != 0) {
    8380           0 :                 return -EINVAL;
    8381             :         }
    8382             : 
    8383           0 :         if (num_zones > total_zones || !num_zones) {
    8384           0 :                 return -EINVAL;
    8385             :         }
    8386             : 
    8387           0 :         assert(!bio->zone_report_buf);
    8388           0 :         bio->zone_report_buf = calloc(1, zone_report_bufsize);
    8389           0 :         if (!bio->zone_report_buf) {
    8390           0 :                 return -ENOMEM;
    8391             :         }
    8392             : 
    8393           0 :         bio->handled_zones = 0;
    8394             : 
    8395           0 :         return spdk_nvme_zns_report_zones(ns, qpair, bio->zone_report_buf, zone_report_bufsize,
    8396           0 :                                           zone_id, SPDK_NVME_ZRA_LIST_ALL, true,
    8397           0 :                                           bdev_nvme_get_zone_info_done, bio);
    8398           0 : }
    8399             : 
    8400             : static int
    8401           0 : bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
    8402             :                           enum spdk_bdev_zone_action action)
    8403             : {
    8404           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8405           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8406             : 
    8407           0 :         switch (action) {
    8408             :         case SPDK_BDEV_ZONE_CLOSE:
    8409           0 :                 return spdk_nvme_zns_close_zone(ns, qpair, zone_id, false,
    8410           0 :                                                 bdev_nvme_zone_management_done, bio);
    8411             :         case SPDK_BDEV_ZONE_FINISH:
    8412           0 :                 return spdk_nvme_zns_finish_zone(ns, qpair, zone_id, false,
    8413           0 :                                                  bdev_nvme_zone_management_done, bio);
    8414             :         case SPDK_BDEV_ZONE_OPEN:
    8415           0 :                 return spdk_nvme_zns_open_zone(ns, qpair, zone_id, false,
    8416           0 :                                                bdev_nvme_zone_management_done, bio);
    8417             :         case SPDK_BDEV_ZONE_RESET:
    8418           0 :                 return spdk_nvme_zns_reset_zone(ns, qpair, zone_id, false,
    8419           0 :                                                 bdev_nvme_zone_management_done, bio);
    8420             :         case SPDK_BDEV_ZONE_OFFLINE:
    8421           0 :                 return spdk_nvme_zns_offline_zone(ns, qpair, zone_id, false,
    8422           0 :                                                   bdev_nvme_zone_management_done, bio);
    8423             :         default:
    8424           0 :                 return -EINVAL;
    8425             :         }
    8426           0 : }
    8427             : 
    8428             : static void
    8429           5 : bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
    8430             :                          struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
    8431             : {
    8432             :         struct nvme_io_path *io_path;
    8433             :         struct nvme_ctrlr *nvme_ctrlr;
    8434             :         uint32_t max_xfer_size;
    8435           5 :         int rc = -ENXIO;
    8436             : 
    8437             :         /* Choose the first ctrlr which is not failed. */
    8438           8 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    8439           7 :                 nvme_ctrlr = io_path->qpair->ctrlr;
    8440             : 
    8441             :                 /* We should skip any unavailable nvme_ctrlr rather than checking
    8442             :                  * if the return value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
    8443             :                  */
    8444           7 :                 if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
    8445           3 :                         continue;
    8446             :                 }
    8447             : 
    8448           4 :                 max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ctrlr->ctrlr);
    8449             : 
    8450           4 :                 if (nbytes > max_xfer_size) {
    8451           0 :                         SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8452           0 :                         rc = -EINVAL;
    8453           0 :                         goto err;
    8454             :                 }
    8455             : 
    8456           8 :                 rc = spdk_nvme_ctrlr_cmd_admin_raw(nvme_ctrlr->ctrlr, cmd, buf, (uint32_t)nbytes,
    8457           4 :                                                    bdev_nvme_admin_passthru_done, bio);
    8458           4 :                 if (rc == 0) {
    8459           4 :                         return;
    8460             :                 }
    8461           1 :         }
    8462             : 
    8463             : err:
    8464           1 :         bdev_nvme_admin_complete(bio, rc);
    8465           5 : }
    8466             : 
    8467             : static int
    8468           0 : bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
    8469             :                       void *buf, size_t nbytes)
    8470             : {
    8471           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8472           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8473           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8474           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    8475             : 
    8476           0 :         if (nbytes > max_xfer_size) {
    8477           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8478           0 :                 return -EINVAL;
    8479             :         }
    8480             : 
    8481             :         /*
    8482             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
    8483             :          * so fill it out automatically.
    8484             :          */
    8485           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8486             : 
    8487           0 :         return spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, cmd, buf,
    8488           0 :                                           (uint32_t)nbytes, bdev_nvme_queued_done, bio);
    8489           0 : }
    8490             : 
    8491             : static int
    8492           0 : bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
    8493             :                          void *buf, size_t nbytes, void *md_buf, size_t md_len)
    8494             : {
    8495           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8496           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8497           0 :         size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
    8498           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8499           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    8500             : 
    8501           0 :         if (nbytes > max_xfer_size) {
    8502           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8503           0 :                 return -EINVAL;
    8504             :         }
    8505             : 
    8506           0 :         if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
    8507           0 :                 SPDK_ERRLOG("invalid meta data buffer size\n");
    8508           0 :                 return -EINVAL;
    8509             :         }
    8510             : 
    8511             :         /*
    8512             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
    8513             :          * so fill it out automatically.
    8514             :          */
    8515           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8516             : 
    8517           0 :         return spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, qpair, cmd, buf,
    8518           0 :                         (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
    8519           0 : }
    8520             : 
    8521             : static int
    8522           0 : bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio,
    8523             :                           struct spdk_nvme_cmd *cmd, struct iovec *iov, int iovcnt,
    8524             :                           size_t nbytes, void *md_buf, size_t md_len)
    8525             : {
    8526           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8527           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8528           0 :         size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
    8529           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8530           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    8531             : 
    8532           0 :         bio->iovs = iov;
    8533           0 :         bio->iovcnt = iovcnt;
    8534           0 :         bio->iovpos = 0;
    8535           0 :         bio->iov_offset = 0;
    8536             : 
    8537           0 :         if (nbytes > max_xfer_size) {
    8538           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8539           0 :                 return -EINVAL;
    8540             :         }
    8541             : 
    8542           0 :         if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
    8543           0 :                 SPDK_ERRLOG("invalid meta data buffer size\n");
    8544           0 :                 return -EINVAL;
    8545             :         }
    8546             : 
    8547             :         /*
    8548             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands
    8549             :          * require a nsid, so fill it out automatically.
    8550             :          */
    8551           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8552             : 
    8553           0 :         return spdk_nvme_ctrlr_cmd_iov_raw_with_md(
    8554           0 :                        ctrlr, qpair, cmd, (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio,
    8555             :                        bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
    8556           0 : }
    8557             : 
    8558             : static void
    8559           6 : bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
    8560             :                 struct nvme_bdev_io *bio_to_abort)
    8561             : {
    8562             :         struct nvme_io_path *io_path;
    8563           6 :         int rc = 0;
    8564             : 
    8565           6 :         rc = bdev_nvme_abort_retry_io(nbdev_ch, bio_to_abort);
    8566           6 :         if (rc == 0) {
    8567           1 :                 bdev_nvme_admin_complete(bio, 0);
    8568           1 :                 return;
    8569             :         }
    8570             : 
    8571           5 :         io_path = bio_to_abort->io_path;
    8572           5 :         if (io_path != NULL) {
    8573           6 :                 rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
    8574           3 :                                                    io_path->qpair->qpair,
    8575           3 :                                                    bio_to_abort,
    8576           3 :                                                    bdev_nvme_abort_done, bio);
    8577           3 :         } else {
    8578           3 :                 STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    8579           4 :                         rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
    8580             :                                                            NULL,
    8581           2 :                                                            bio_to_abort,
    8582           2 :                                                            bdev_nvme_abort_done, bio);
    8583             : 
    8584           2 :                         if (rc != -ENOENT) {
    8585           1 :                                 break;
    8586             :                         }
    8587           1 :                 }
    8588             :         }
    8589             : 
    8590           5 :         if (rc != 0) {
    8591             :                 /* If no command was found or there was any error, complete the abort
    8592             :                  * request with failure.
    8593             :                  */
    8594           2 :                 bdev_nvme_admin_complete(bio, rc);
    8595           2 :         }
    8596           6 : }
    8597             : 
    8598             : static int
    8599           0 : bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
    8600             :                uint64_t num_blocks)
    8601             : {
    8602           0 :         struct spdk_nvme_scc_source_range range = {
    8603           0 :                 .slba = src_offset_blocks,
    8604           0 :                 .nlb = num_blocks - 1
    8605             :         };
    8606             : 
    8607           0 :         return spdk_nvme_ns_cmd_copy(bio->io_path->nvme_ns->ns,
    8608           0 :                                      bio->io_path->qpair->qpair,
    8609           0 :                                      &range, 1, dst_offset_blocks,
    8610           0 :                                      bdev_nvme_queued_done, bio);
    8611             : }
    8612             : 
    8613             : static void
    8614           0 : bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
    8615             : {
    8616             :         const char *action;
    8617             :         uint32_t i;
    8618             : 
    8619           0 :         if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
    8620           0 :                 action = "reset";
    8621           0 :         } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
    8622           0 :                 action = "abort";
    8623           0 :         } else {
    8624           0 :                 action = "none";
    8625             :         }
    8626             : 
    8627           0 :         spdk_json_write_object_begin(w);
    8628             : 
    8629           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_set_options");
    8630             : 
    8631           0 :         spdk_json_write_named_object_begin(w, "params");
    8632           0 :         spdk_json_write_named_string(w, "action_on_timeout", action);
    8633           0 :         spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
    8634           0 :         spdk_json_write_named_uint64(w, "timeout_admin_us", g_opts.timeout_admin_us);
    8635           0 :         spdk_json_write_named_uint32(w, "keep_alive_timeout_ms", g_opts.keep_alive_timeout_ms);
    8636           0 :         spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst);
    8637           0 :         spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight);
    8638           0 :         spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight);
    8639           0 :         spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight);
    8640           0 :         spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
    8641           0 :         spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
    8642           0 :         spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
    8643           0 :         spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
    8644           0 :         spdk_json_write_named_uint32(w, "transport_retry_count", g_opts.transport_retry_count);
    8645           0 :         spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
    8646           0 :         spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
    8647           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
    8648           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
    8649           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
    8650           0 :         spdk_json_write_named_bool(w, "disable_auto_failback", g_opts.disable_auto_failback);
    8651           0 :         spdk_json_write_named_bool(w, "generate_uuids", g_opts.generate_uuids);
    8652           0 :         spdk_json_write_named_uint8(w, "transport_tos", g_opts.transport_tos);
    8653           0 :         spdk_json_write_named_bool(w, "nvme_error_stat", g_opts.nvme_error_stat);
    8654           0 :         spdk_json_write_named_uint32(w, "rdma_srq_size", g_opts.rdma_srq_size);
    8655           0 :         spdk_json_write_named_bool(w, "io_path_stat", g_opts.io_path_stat);
    8656           0 :         spdk_json_write_named_bool(w, "allow_accel_sequence", g_opts.allow_accel_sequence);
    8657           0 :         spdk_json_write_named_uint32(w, "rdma_max_cq_size", g_opts.rdma_max_cq_size);
    8658           0 :         spdk_json_write_named_uint16(w, "rdma_cm_event_timeout_ms", g_opts.rdma_cm_event_timeout_ms);
    8659           0 :         spdk_json_write_named_array_begin(w, "dhchap_digests");
    8660           0 :         for (i = 0; i < 32; ++i) {
    8661           0 :                 if (g_opts.dhchap_digests & SPDK_BIT(i)) {
    8662           0 :                         spdk_json_write_string(w, spdk_nvme_dhchap_get_digest_name(i));
    8663           0 :                 }
    8664           0 :         }
    8665           0 :         spdk_json_write_array_end(w);
    8666           0 :         spdk_json_write_named_array_begin(w, "dhchap_dhgroups");
    8667           0 :         for (i = 0; i < 32; ++i) {
    8668           0 :                 if (g_opts.dhchap_dhgroups & SPDK_BIT(i)) {
    8669           0 :                         spdk_json_write_string(w, spdk_nvme_dhchap_get_dhgroup_name(i));
    8670           0 :                 }
    8671           0 :         }
    8672             : 
    8673           0 :         spdk_json_write_array_end(w);
    8674           0 :         spdk_json_write_object_end(w);
    8675             : 
    8676           0 :         spdk_json_write_object_end(w);
    8677           0 : }
    8678             : 
    8679             : static void
    8680           0 : bdev_nvme_discovery_config_json(struct spdk_json_write_ctx *w, struct discovery_ctx *ctx)
    8681             : {
    8682             :         struct spdk_nvme_transport_id trid;
    8683             : 
    8684           0 :         spdk_json_write_object_begin(w);
    8685             : 
    8686           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_start_discovery");
    8687             : 
    8688           0 :         spdk_json_write_named_object_begin(w, "params");
    8689           0 :         spdk_json_write_named_string(w, "name", ctx->name);
    8690           0 :         spdk_json_write_named_string(w, "hostnqn", ctx->hostnqn);
    8691             : 
    8692           0 :         trid = ctx->trid;
    8693           0 :         memset(trid.subnqn, 0, sizeof(trid.subnqn));
    8694           0 :         nvme_bdev_dump_trid_json(&trid, w);
    8695             : 
    8696           0 :         spdk_json_write_named_bool(w, "wait_for_attach", ctx->wait_for_attach);
    8697           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", ctx->bdev_opts.ctrlr_loss_timeout_sec);
    8698           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", ctx->bdev_opts.reconnect_delay_sec);
    8699           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
    8700           0 :                                      ctx->bdev_opts.fast_io_fail_timeout_sec);
    8701           0 :         spdk_json_write_object_end(w);
    8702             : 
    8703           0 :         spdk_json_write_object_end(w);
    8704           0 : }
    8705             : 
    8706             : #ifdef SPDK_CONFIG_NVME_CUSE
    8707             : static void
    8708             : nvme_ctrlr_cuse_config_json(struct spdk_json_write_ctx *w,
    8709             :                             struct nvme_ctrlr *nvme_ctrlr)
    8710             : {
    8711             :         size_t cuse_name_size = 128;
    8712             :         char cuse_name[cuse_name_size];
    8713             : 
    8714             :         if (spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr,
    8715             :                                           cuse_name, &cuse_name_size) != 0) {
    8716             :                 return;
    8717             :         }
    8718             : 
    8719             :         spdk_json_write_object_begin(w);
    8720             : 
    8721             :         spdk_json_write_named_string(w, "method", "bdev_nvme_cuse_register");
    8722             : 
    8723             :         spdk_json_write_named_object_begin(w, "params");
    8724             :         spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
    8725             :         spdk_json_write_object_end(w);
    8726             : 
    8727             :         spdk_json_write_object_end(w);
    8728             : }
    8729             : #endif
    8730             : 
    8731             : static void
    8732           0 : nvme_ctrlr_config_json(struct spdk_json_write_ctx *w,
    8733             :                        struct nvme_ctrlr *nvme_ctrlr,
    8734             :                        struct nvme_path_id *path_id)
    8735             : {
    8736             :         struct spdk_nvme_transport_id   *trid;
    8737             :         const struct spdk_nvme_ctrlr_opts *opts;
    8738             : 
    8739           0 :         if (nvme_ctrlr->opts.from_discovery_service) {
    8740             :                 /* Do not emit an RPC for this - it will be implicitly
    8741             :                  * covered by a separate bdev_nvme_start_discovery or
    8742             :                  * bdev_nvme_start_mdns_discovery RPC.
    8743             :                  */
    8744           0 :                 return;
    8745             :         }
    8746             : 
    8747           0 :         trid = &path_id->trid;
    8748             : 
    8749           0 :         spdk_json_write_object_begin(w);
    8750             : 
    8751           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller");
    8752             : 
    8753           0 :         spdk_json_write_named_object_begin(w, "params");
    8754           0 :         spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
    8755           0 :         nvme_bdev_dump_trid_json(trid, w);
    8756           0 :         spdk_json_write_named_bool(w, "prchk_reftag",
    8757           0 :                                    (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
    8758           0 :         spdk_json_write_named_bool(w, "prchk_guard",
    8759           0 :                                    (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
    8760           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->opts.ctrlr_loss_timeout_sec);
    8761           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->opts.reconnect_delay_sec);
    8762           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
    8763           0 :                                      nvme_ctrlr->opts.fast_io_fail_timeout_sec);
    8764           0 :         if (nvme_ctrlr->psk != NULL) {
    8765           0 :                 spdk_json_write_named_string(w, "psk", spdk_key_get_name(nvme_ctrlr->psk));
    8766           0 :         }
    8767           0 :         if (nvme_ctrlr->dhchap_key != NULL) {
    8768           0 :                 spdk_json_write_named_string(w, "dhchap_key",
    8769           0 :                                              spdk_key_get_name(nvme_ctrlr->dhchap_key));
    8770           0 :         }
    8771           0 :         if (nvme_ctrlr->dhchap_ctrlr_key != NULL) {
    8772           0 :                 spdk_json_write_named_string(w, "dhchap_ctrlr_key",
    8773           0 :                                              spdk_key_get_name(nvme_ctrlr->dhchap_ctrlr_key));
    8774           0 :         }
    8775           0 :         opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
    8776           0 :         spdk_json_write_named_string(w, "hostnqn", opts->hostnqn);
    8777           0 :         spdk_json_write_named_bool(w, "hdgst", opts->header_digest);
    8778           0 :         spdk_json_write_named_bool(w, "ddgst", opts->data_digest);
    8779           0 :         if (opts->src_addr[0] != '\0') {
    8780           0 :                 spdk_json_write_named_string(w, "hostaddr", opts->src_addr);
    8781           0 :         }
    8782           0 :         if (opts->src_svcid[0] != '\0') {
    8783           0 :                 spdk_json_write_named_string(w, "hostsvcid", opts->src_svcid);
    8784           0 :         }
    8785             : 
    8786           0 :         if (nvme_ctrlr->opts.multipath) {
    8787           0 :                 spdk_json_write_named_string(w, "multipath", "multipath");
    8788           0 :         }
    8789           0 :         spdk_json_write_object_end(w);
    8790             : 
    8791           0 :         spdk_json_write_object_end(w);
    8792           0 : }
    8793             : 
    8794             : static void
    8795           0 : bdev_nvme_hotplug_config_json(struct spdk_json_write_ctx *w)
    8796             : {
    8797           0 :         spdk_json_write_object_begin(w);
    8798           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug");
    8799             : 
    8800           0 :         spdk_json_write_named_object_begin(w, "params");
    8801           0 :         spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
    8802           0 :         spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
    8803           0 :         spdk_json_write_object_end(w);
    8804             : 
    8805           0 :         spdk_json_write_object_end(w);
    8806           0 : }
    8807             : 
    8808             : static int
    8809           0 : bdev_nvme_config_json(struct spdk_json_write_ctx *w)
    8810             : {
    8811             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
    8812             :         struct nvme_ctrlr       *nvme_ctrlr;
    8813             :         struct discovery_ctx    *ctx;
    8814             :         struct nvme_path_id     *path_id;
    8815             : 
    8816           0 :         bdev_nvme_opts_config_json(w);
    8817             : 
    8818           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    8819             : 
    8820           0 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
    8821           0 :                 TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    8822           0 :                         path_id = nvme_ctrlr->active_path_id;
    8823           0 :                         assert(path_id == TAILQ_FIRST(&nvme_ctrlr->trids));
    8824           0 :                         nvme_ctrlr_config_json(w, nvme_ctrlr, path_id);
    8825             : 
    8826           0 :                         path_id = TAILQ_NEXT(path_id, link);
    8827           0 :                         while (path_id != NULL) {
    8828           0 :                                 nvme_ctrlr_config_json(w, nvme_ctrlr, path_id);
    8829           0 :                                 path_id = TAILQ_NEXT(path_id, link);
    8830             :                         }
    8831             : 
    8832             : #ifdef SPDK_CONFIG_NVME_CUSE
    8833             :                         nvme_ctrlr_cuse_config_json(w, nvme_ctrlr);
    8834             : #endif
    8835           0 :                 }
    8836           0 :         }
    8837             : 
    8838           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    8839           0 :                 if (!ctx->from_mdns_discovery_service) {
    8840           0 :                         bdev_nvme_discovery_config_json(w, ctx);
    8841           0 :                 }
    8842           0 :         }
    8843             : 
    8844           0 :         bdev_nvme_mdns_discovery_config_json(w);
    8845             : 
    8846             :         /* Dump as last parameter to give all NVMe bdevs chance to be constructed
    8847             :          * before enabling hotplug poller.
    8848             :          */
    8849           0 :         bdev_nvme_hotplug_config_json(w);
    8850             : 
    8851           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    8852           0 :         return 0;
    8853             : }
    8854             : 
    8855             : struct spdk_nvme_ctrlr *
    8856           1 : bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
    8857             : {
    8858             :         struct nvme_bdev *nbdev;
    8859             :         struct nvme_ns *nvme_ns;
    8860             : 
    8861           1 :         if (!bdev || bdev->module != &nvme_if) {
    8862           0 :                 return NULL;
    8863             :         }
    8864             : 
    8865           1 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    8866           1 :         nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    8867           1 :         assert(nvme_ns != NULL);
    8868             : 
    8869           1 :         return nvme_ns->ctrlr->ctrlr;
    8870           1 : }
    8871             : 
    8872             : static bool
    8873          12 : nvme_io_path_is_current(struct nvme_io_path *io_path)
    8874             : {
    8875             :         const struct nvme_bdev_channel *nbdev_ch;
    8876             :         bool current;
    8877             : 
    8878          12 :         if (!nvme_io_path_is_available(io_path)) {
    8879           4 :                 return false;
    8880             :         }
    8881             : 
    8882           8 :         nbdev_ch = io_path->nbdev_ch;
    8883           8 :         if (nbdev_ch == NULL) {
    8884           1 :                 current = false;
    8885           8 :         } else if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
    8886           3 :                 struct nvme_io_path *optimized_io_path = NULL;
    8887             : 
    8888           6 :                 STAILQ_FOREACH(optimized_io_path, &nbdev_ch->io_path_list, stailq) {
    8889           5 :                         if (optimized_io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) {
    8890           2 :                                 break;
    8891             :                         }
    8892           3 :                 }
    8893             : 
    8894             :                 /* A non-optimized path is only current if there are no optimized paths. */
    8895           3 :                 current = (io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) ||
    8896           2 :                           (optimized_io_path == NULL);
    8897           3 :         } else {
    8898           4 :                 if (nbdev_ch->current_io_path) {
    8899           1 :                         current = (io_path == nbdev_ch->current_io_path);
    8900           1 :                 } else {
    8901             :                         struct nvme_io_path *first_path;
    8902             : 
    8903             :                         /* We arrived here as there are no optimized paths for active-passive
    8904             :                          * mode. Check if this io_path is the first one available on the list.
    8905             :                          */
    8906           3 :                         current = false;
    8907           3 :                         STAILQ_FOREACH(first_path, &nbdev_ch->io_path_list, stailq) {
    8908           3 :                                 if (nvme_io_path_is_available(first_path)) {
    8909           3 :                                         current = (io_path == first_path);
    8910           3 :                                         break;
    8911             :                                 }
    8912           0 :                         }
    8913             :                 }
    8914             :         }
    8915             : 
    8916           8 :         return current;
    8917          12 : }
    8918             : 
    8919             : static struct nvme_ctrlr *
    8920           0 : bdev_nvme_next_ctrlr_unsafe(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct nvme_ctrlr *prev)
    8921             : {
    8922             :         struct nvme_ctrlr *next;
    8923             : 
    8924             :         /* Must be called under g_bdev_nvme_mutex */
    8925           0 :         next = prev != NULL ? TAILQ_NEXT(prev, tailq) : TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
    8926           0 :         while (next != NULL) {
    8927             :                 /* ref can be 0 when the ctrlr was released, but hasn't been detached yet */
    8928           0 :                 pthread_mutex_lock(&next->mutex);
    8929           0 :                 if (next->ref > 0) {
    8930           0 :                         next->ref++;
    8931           0 :                         pthread_mutex_unlock(&next->mutex);
    8932           0 :                         return next;
    8933             :                 }
    8934             : 
    8935           0 :                 pthread_mutex_unlock(&next->mutex);
    8936           0 :                 next = TAILQ_NEXT(next, tailq);
    8937             :         }
    8938             : 
    8939           0 :         return NULL;
    8940           0 : }
    8941             : 
    8942             : struct bdev_nvme_set_keys_ctx {
    8943             :         struct nvme_ctrlr       *nctrlr;
    8944             :         struct spdk_key         *dhchap_key;
    8945             :         struct spdk_key         *dhchap_ctrlr_key;
    8946             :         struct spdk_thread      *thread;
    8947             :         bdev_nvme_set_keys_cb   cb_fn;
    8948             :         void                    *cb_ctx;
    8949             :         int                     status;
    8950             : };
    8951             : 
    8952             : static void
    8953           0 : bdev_nvme_free_set_keys_ctx(struct bdev_nvme_set_keys_ctx *ctx)
    8954             : {
    8955           0 :         if (ctx == NULL) {
    8956           0 :                 return;
    8957             :         }
    8958             : 
    8959           0 :         spdk_keyring_put_key(ctx->dhchap_key);
    8960           0 :         spdk_keyring_put_key(ctx->dhchap_ctrlr_key);
    8961           0 :         free(ctx);
    8962           0 : }
    8963             : 
    8964             : static void
    8965           0 : _bdev_nvme_set_keys_done(void *_ctx)
    8966             : {
    8967           0 :         struct bdev_nvme_set_keys_ctx *ctx = _ctx;
    8968             : 
    8969           0 :         ctx->cb_fn(ctx->cb_ctx, ctx->status);
    8970             : 
    8971           0 :         if (ctx->nctrlr != NULL) {
    8972           0 :                 nvme_ctrlr_release(ctx->nctrlr);
    8973           0 :         }
    8974           0 :         bdev_nvme_free_set_keys_ctx(ctx);
    8975           0 : }
    8976             : 
    8977             : static void
    8978           0 : bdev_nvme_set_keys_done(struct bdev_nvme_set_keys_ctx *ctx, int status)
    8979             : {
    8980           0 :         ctx->status = status;
    8981           0 :         spdk_thread_exec_msg(ctx->thread, _bdev_nvme_set_keys_done, ctx);
    8982           0 : }
    8983             : 
    8984             : static void bdev_nvme_authenticate_ctrlr(struct bdev_nvme_set_keys_ctx *ctx);
    8985             : 
    8986             : static void
    8987           0 : bdev_nvme_authenticate_ctrlr_continue(struct bdev_nvme_set_keys_ctx *ctx)
    8988             : {
    8989             :         struct nvme_ctrlr *next;
    8990             : 
    8991           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    8992           0 :         next = bdev_nvme_next_ctrlr_unsafe(NULL, ctx->nctrlr);
    8993           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    8994             : 
    8995           0 :         nvme_ctrlr_release(ctx->nctrlr);
    8996           0 :         ctx->nctrlr = next;
    8997             : 
    8998           0 :         if (next == NULL) {
    8999           0 :                 bdev_nvme_set_keys_done(ctx, 0);
    9000           0 :         } else {
    9001           0 :                 bdev_nvme_authenticate_ctrlr(ctx);
    9002             :         }
    9003           0 : }
    9004             : 
    9005             : static void
    9006           0 : bdev_nvme_authenticate_qpairs_done(struct spdk_io_channel_iter *i, int status)
    9007             : {
    9008           0 :         struct bdev_nvme_set_keys_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
    9009             : 
    9010           0 :         if (status != 0) {
    9011           0 :                 bdev_nvme_set_keys_done(ctx, status);
    9012           0 :                 return;
    9013             :         }
    9014           0 :         bdev_nvme_authenticate_ctrlr_continue(ctx);
    9015           0 : }
    9016             : 
    9017             : static void
    9018           0 : bdev_nvme_authenticate_qpair_done(void *ctx, int status)
    9019             : {
    9020           0 :         spdk_for_each_channel_continue(ctx, status);
    9021           0 : }
    9022             : 
    9023             : static void
    9024           0 : bdev_nvme_authenticate_qpair(struct spdk_io_channel_iter *i)
    9025             : {
    9026           0 :         struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
    9027           0 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
    9028           0 :         struct nvme_qpair *qpair = ctrlr_ch->qpair;
    9029             :         int rc;
    9030             : 
    9031           0 :         if (!nvme_qpair_is_connected(qpair)) {
    9032           0 :                 spdk_for_each_channel_continue(i, 0);
    9033           0 :                 return;
    9034             :         }
    9035             : 
    9036           0 :         rc = spdk_nvme_qpair_authenticate(qpair->qpair, bdev_nvme_authenticate_qpair_done, i);
    9037           0 :         if (rc != 0) {
    9038           0 :                 spdk_for_each_channel_continue(i, rc);
    9039           0 :         }
    9040           0 : }
    9041             : 
    9042             : static void
    9043           0 : bdev_nvme_authenticate_ctrlr_done(void *_ctx, int status)
    9044             : {
    9045           0 :         struct bdev_nvme_set_keys_ctx *ctx = _ctx;
    9046             : 
    9047           0 :         if (status != 0) {
    9048           0 :                 bdev_nvme_set_keys_done(ctx, status);
    9049           0 :                 return;
    9050             :         }
    9051             : 
    9052           0 :         spdk_for_each_channel(ctx->nctrlr, bdev_nvme_authenticate_qpair, ctx,
    9053             :                               bdev_nvme_authenticate_qpairs_done);
    9054           0 : }
    9055             : 
    9056             : static void
    9057           0 : bdev_nvme_authenticate_ctrlr(struct bdev_nvme_set_keys_ctx *ctx)
    9058             : {
    9059           0 :         struct spdk_nvme_ctrlr_key_opts opts = {};
    9060           0 :         struct nvme_ctrlr *nctrlr = ctx->nctrlr;
    9061             :         int rc;
    9062             : 
    9063           0 :         opts.size = SPDK_SIZEOF(&opts, dhchap_ctrlr_key);
    9064           0 :         opts.dhchap_key = ctx->dhchap_key;
    9065           0 :         opts.dhchap_ctrlr_key = ctx->dhchap_ctrlr_key;
    9066           0 :         rc = spdk_nvme_ctrlr_set_keys(nctrlr->ctrlr, &opts);
    9067           0 :         if (rc != 0) {
    9068           0 :                 bdev_nvme_set_keys_done(ctx, rc);
    9069           0 :                 return;
    9070             :         }
    9071             : 
    9072           0 :         if (ctx->dhchap_key != NULL) {
    9073           0 :                 rc = spdk_nvme_ctrlr_authenticate(nctrlr->ctrlr,
    9074           0 :                                                   bdev_nvme_authenticate_ctrlr_done, ctx);
    9075           0 :                 if (rc != 0) {
    9076           0 :                         bdev_nvme_set_keys_done(ctx, rc);
    9077           0 :                 }
    9078           0 :         } else {
    9079           0 :                 bdev_nvme_authenticate_ctrlr_continue(ctx);
    9080             :         }
    9081           0 : }
    9082             : 
    9083             : int
    9084           0 : bdev_nvme_set_keys(const char *name, const char *dhchap_key, const char *dhchap_ctrlr_key,
    9085             :                    bdev_nvme_set_keys_cb cb_fn, void *cb_ctx)
    9086             : {
    9087             :         struct bdev_nvme_set_keys_ctx *ctx;
    9088             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    9089             :         struct nvme_ctrlr *nctrlr;
    9090             : 
    9091           0 :         ctx = calloc(1, sizeof(*ctx));
    9092           0 :         if (ctx == NULL) {
    9093           0 :                 return -ENOMEM;
    9094             :         }
    9095             : 
    9096           0 :         if (dhchap_key != NULL) {
    9097           0 :                 ctx->dhchap_key = spdk_keyring_get_key(dhchap_key);
    9098           0 :                 if (ctx->dhchap_key == NULL) {
    9099           0 :                         SPDK_ERRLOG("Could not find key %s for bdev %s\n", dhchap_key, name);
    9100           0 :                         bdev_nvme_free_set_keys_ctx(ctx);
    9101           0 :                         return -ENOKEY;
    9102             :                 }
    9103           0 :         }
    9104           0 :         if (dhchap_ctrlr_key != NULL) {
    9105           0 :                 ctx->dhchap_ctrlr_key = spdk_keyring_get_key(dhchap_ctrlr_key);
    9106           0 :                 if (ctx->dhchap_ctrlr_key == NULL) {
    9107           0 :                         SPDK_ERRLOG("Could not find key %s for bdev %s\n", dhchap_ctrlr_key, name);
    9108           0 :                         bdev_nvme_free_set_keys_ctx(ctx);
    9109           0 :                         return -ENOKEY;
    9110             :                 }
    9111           0 :         }
    9112             : 
    9113           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    9114           0 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    9115           0 :         if (nbdev_ctrlr == NULL) {
    9116           0 :                 SPDK_ERRLOG("Could not find bdev_ctrlr %s\n", name);
    9117           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    9118           0 :                 bdev_nvme_free_set_keys_ctx(ctx);
    9119           0 :                 return -ENODEV;
    9120             :         }
    9121           0 :         nctrlr = bdev_nvme_next_ctrlr_unsafe(nbdev_ctrlr, NULL);
    9122           0 :         if (nctrlr == NULL) {
    9123           0 :                 SPDK_ERRLOG("Could not find any nvme_ctrlrs on bdev_ctrlr %s\n", name);
    9124           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    9125           0 :                 bdev_nvme_free_set_keys_ctx(ctx);
    9126           0 :                 return -ENODEV;
    9127             :         }
    9128           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    9129             : 
    9130           0 :         ctx->nctrlr = nctrlr;
    9131           0 :         ctx->cb_fn = cb_fn;
    9132           0 :         ctx->cb_ctx = cb_ctx;
    9133           0 :         ctx->thread = spdk_get_thread();
    9134             : 
    9135           0 :         bdev_nvme_authenticate_ctrlr(ctx);
    9136             : 
    9137           0 :         return 0;
    9138           0 : }
    9139             : 
    9140             : void
    9141           0 : nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path)
    9142             : {
    9143           0 :         struct nvme_ns *nvme_ns = io_path->nvme_ns;
    9144           0 :         struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
    9145             :         const struct spdk_nvme_ctrlr_data *cdata;
    9146             :         const struct spdk_nvme_transport_id *trid;
    9147             :         const char *adrfam_str;
    9148             : 
    9149           0 :         spdk_json_write_object_begin(w);
    9150             : 
    9151           0 :         spdk_json_write_named_string(w, "bdev_name", nvme_ns->bdev->disk.name);
    9152             : 
    9153           0 :         cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    9154           0 :         trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);
    9155             : 
    9156           0 :         spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
    9157           0 :         spdk_json_write_named_bool(w, "current", nvme_io_path_is_current(io_path));
    9158           0 :         spdk_json_write_named_bool(w, "connected", nvme_qpair_is_connected(io_path->qpair));
    9159           0 :         spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));
    9160             : 
    9161           0 :         spdk_json_write_named_object_begin(w, "transport");
    9162           0 :         spdk_json_write_named_string(w, "trtype", trid->trstring);
    9163           0 :         spdk_json_write_named_string(w, "traddr", trid->traddr);
    9164           0 :         if (trid->trsvcid[0] != '\0') {
    9165           0 :                 spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
    9166           0 :         }
    9167           0 :         adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
    9168           0 :         if (adrfam_str) {
    9169           0 :                 spdk_json_write_named_string(w, "adrfam", adrfam_str);
    9170           0 :         }
    9171           0 :         spdk_json_write_object_end(w);
    9172             : 
    9173           0 :         spdk_json_write_object_end(w);
    9174           0 : }
    9175             : 
    9176             : void
    9177           0 : bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w)
    9178             : {
    9179             :         struct discovery_ctx *ctx;
    9180             :         struct discovery_entry_ctx *entry_ctx;
    9181             : 
    9182           0 :         spdk_json_write_array_begin(w);
    9183           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    9184           0 :                 spdk_json_write_object_begin(w);
    9185           0 :                 spdk_json_write_named_string(w, "name", ctx->name);
    9186             : 
    9187           0 :                 spdk_json_write_named_object_begin(w, "trid");
    9188           0 :                 nvme_bdev_dump_trid_json(&ctx->trid, w);
    9189           0 :                 spdk_json_write_object_end(w);
    9190             : 
    9191           0 :                 spdk_json_write_named_array_begin(w, "referrals");
    9192           0 :                 TAILQ_FOREACH(entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
    9193           0 :                         spdk_json_write_object_begin(w);
    9194           0 :                         spdk_json_write_named_object_begin(w, "trid");
    9195           0 :                         nvme_bdev_dump_trid_json(&entry_ctx->trid, w);
    9196           0 :                         spdk_json_write_object_end(w);
    9197           0 :                         spdk_json_write_object_end(w);
    9198           0 :                 }
    9199           0 :                 spdk_json_write_array_end(w);
    9200             : 
    9201           0 :                 spdk_json_write_object_end(w);
    9202           0 :         }
    9203           0 :         spdk_json_write_array_end(w);
    9204           0 : }
    9205             : 
    9206           1 : SPDK_LOG_REGISTER_COMPONENT(bdev_nvme)
    9207             : 
    9208             : static void
    9209           0 : bdev_nvme_trace(void)
    9210             : {
    9211           0 :         struct spdk_trace_tpoint_opts opts[] = {
    9212             :                 {
    9213             :                         "BDEV_NVME_IO_START", TRACE_BDEV_NVME_IO_START,
    9214             :                         OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 1,
    9215             :                         {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
    9216             :                 },
    9217             :                 {
    9218             :                         "BDEV_NVME_IO_DONE", TRACE_BDEV_NVME_IO_DONE,
    9219             :                         OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 0,
    9220             :                         {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
    9221             :                 }
    9222             :         };
    9223             : 
    9224             : 
    9225           0 :         spdk_trace_register_object(OBJECT_BDEV_NVME_IO, 'N');
    9226           0 :         spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
    9227           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
    9228           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
    9229           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
    9230           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
    9231           0 : }
    9232           1 : SPDK_TRACE_REGISTER_FN(bdev_nvme_trace, "bdev_nvme", TRACE_GROUP_BDEV_NVME)

Generated by: LCOV version 1.15