LCOV - code coverage report
Current view: top level - module/bdev/nvme - bdev_nvme.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 2269 4216 53.8 %
Date: 2024-08-12 13:06:34 Functions: 215 308 69.8 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2016 Intel Corporation. All rights reserved.
       3             :  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
       4             :  *   Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  *   Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved.
       6             :  */
       7             : 
       8             : #include "spdk/stdinc.h"
       9             : 
      10             : #include "bdev_nvme.h"
      11             : 
      12             : #include "spdk/accel.h"
      13             : #include "spdk/config.h"
      14             : #include "spdk/endian.h"
      15             : #include "spdk/bdev.h"
      16             : #include "spdk/json.h"
      17             : #include "spdk/keyring.h"
      18             : #include "spdk/likely.h"
      19             : #include "spdk/nvme.h"
      20             : #include "spdk/nvme_ocssd.h"
      21             : #include "spdk/nvme_zns.h"
      22             : #include "spdk/opal.h"
      23             : #include "spdk/thread.h"
      24             : #include "spdk/trace.h"
      25             : #include "spdk/string.h"
      26             : #include "spdk/util.h"
      27             : #include "spdk/uuid.h"
      28             : 
      29             : #include "spdk/bdev_module.h"
      30             : #include "spdk/log.h"
      31             : 
      32             : #include "spdk_internal/usdt.h"
      33             : #include "spdk_internal/trace_defs.h"
      34             : 
      35             : #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true
      36             : #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
      37             : 
      38             : #define NSID_STR_LEN 10
      39             : 
      40             : #define SPDK_CONTROLLER_NAME_MAX 512
      41             : 
      42             : static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
      43             : 
      44             : struct nvme_bdev_io {
      45             :         /** array of iovecs to transfer. */
      46             :         struct iovec *iovs;
      47             : 
      48             :         /** Number of iovecs in iovs array. */
      49             :         int iovcnt;
      50             : 
      51             :         /** Current iovec position. */
      52             :         int iovpos;
      53             : 
      54             :         /** Offset in current iovec. */
      55             :         uint32_t iov_offset;
      56             : 
      57             :         /** I/O path the current I/O or admin passthrough is submitted on, or the I/O path
      58             :          *  being reset in a reset I/O.
      59             :          */
      60             :         struct nvme_io_path *io_path;
      61             : 
      62             :         /** array of iovecs to transfer. */
      63             :         struct iovec *fused_iovs;
      64             : 
      65             :         /** Number of iovecs in iovs array. */
      66             :         int fused_iovcnt;
      67             : 
      68             :         /** Current iovec position. */
      69             :         int fused_iovpos;
      70             : 
      71             :         /** Offset in current iovec. */
      72             :         uint32_t fused_iov_offset;
      73             : 
      74             :         /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */
      75             :         struct spdk_nvme_cpl cpl;
      76             : 
      77             :         /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */
      78             :         struct spdk_nvme_ns_cmd_ext_io_opts ext_opts;
      79             : 
      80             :         /** Keeps track if first of fused commands was submitted */
      81             :         bool first_fused_submitted;
      82             : 
      83             :         /** Keeps track if first of fused commands was completed */
      84             :         bool first_fused_completed;
      85             : 
      86             :         /** Temporary pointer to zone report buffer */
      87             :         struct spdk_nvme_zns_zone_report *zone_report_buf;
      88             : 
      89             :         /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */
      90             :         uint64_t handled_zones;
      91             : 
      92             :         /** Expiration value in ticks to retry the current I/O. */
      93             :         uint64_t retry_ticks;
      94             : 
      95             :         /* How many times the current I/O was retried. */
      96             :         int32_t retry_count;
      97             : 
      98             :         /* Current tsc at submit time. */
      99             :         uint64_t submit_tsc;
     100             : 
     101             :         /* Used to put nvme_bdev_io into the list */
     102             :         TAILQ_ENTRY(nvme_bdev_io) retry_link;
     103             : };
     104             : 
     105             : struct nvme_probe_skip_entry {
     106             :         struct spdk_nvme_transport_id           trid;
     107             :         TAILQ_ENTRY(nvme_probe_skip_entry)      tailq;
     108             : };
     109             : /* All the controllers deleted by users via RPC are skipped by hotplug monitor */
     110             : static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
     111             :                         g_skipped_nvme_ctrlrs);
     112             : 
     113             : #define BDEV_NVME_DEFAULT_DIGESTS (SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA256) | \
     114             :                                    SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA384) | \
     115             :                                    SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA512))
     116             : 
     117             : #define BDEV_NVME_DEFAULT_DHGROUPS (SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_NULL) | \
     118             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_2048) | \
     119             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_3072) | \
     120             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_4096) | \
     121             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_6144) | \
     122             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_8192))
     123             : 
     124             : static struct spdk_bdev_nvme_opts g_opts = {
     125             :         .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
     126             :         .timeout_us = 0,
     127             :         .timeout_admin_us = 0,
     128             :         .keep_alive_timeout_ms = SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS,
     129             :         .transport_retry_count = 4,
     130             :         .arbitration_burst = 0,
     131             :         .low_priority_weight = 0,
     132             :         .medium_priority_weight = 0,
     133             :         .high_priority_weight = 0,
     134             :         .nvme_adminq_poll_period_us = 10000ULL,
     135             :         .nvme_ioq_poll_period_us = 0,
     136             :         .io_queue_requests = 0,
     137             :         .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
     138             :         .bdev_retry_count = 3,
     139             :         .transport_ack_timeout = 0,
     140             :         .ctrlr_loss_timeout_sec = 0,
     141             :         .reconnect_delay_sec = 0,
     142             :         .fast_io_fail_timeout_sec = 0,
     143             :         .disable_auto_failback = false,
     144             :         .generate_uuids = false,
     145             :         .transport_tos = 0,
     146             :         .nvme_error_stat = false,
     147             :         .io_path_stat = false,
     148             :         .allow_accel_sequence = false,
     149             :         .dhchap_digests = BDEV_NVME_DEFAULT_DIGESTS,
     150             :         .dhchap_dhgroups = BDEV_NVME_DEFAULT_DHGROUPS,
     151             : };
     152             : 
     153             : #define NVME_HOTPLUG_POLL_PERIOD_MAX                    10000000ULL
     154             : #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT                100000ULL
     155             : 
     156             : static int g_hot_insert_nvme_controller_index = 0;
     157             : static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
     158             : static bool g_nvme_hotplug_enabled = false;
     159             : struct spdk_thread *g_bdev_nvme_init_thread;
     160             : static struct spdk_poller *g_hotplug_poller;
     161             : static struct spdk_poller *g_hotplug_probe_poller;
     162             : static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
     163             : 
     164             : static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
     165             :                 struct nvme_async_probe_ctx *ctx);
     166             : static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
     167             :                 struct nvme_async_probe_ctx *ctx);
     168             : static int bdev_nvme_library_init(void);
     169             : static void bdev_nvme_library_fini(void);
     170             : static void _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch,
     171             :                                       struct spdk_bdev_io *bdev_io);
     172             : static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
     173             :                                      struct spdk_bdev_io *bdev_io);
     174             : static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     175             :                            void *md, uint64_t lba_count, uint64_t lba,
     176             :                            uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
     177             :                            struct spdk_accel_sequence *seq);
     178             : static int bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     179             :                                  void *md, uint64_t lba_count, uint64_t lba);
     180             : static int bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     181             :                             void *md, uint64_t lba_count, uint64_t lba,
     182             :                             uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
     183             :                             struct spdk_accel_sequence *seq,
     184             :                             union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13);
     185             : static int bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     186             :                                   void *md, uint64_t lba_count,
     187             :                                   uint64_t zslba, uint32_t flags);
     188             : static int bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     189             :                               void *md, uint64_t lba_count, uint64_t lba,
     190             :                               uint32_t flags);
     191             : static int bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio,
     192             :                 struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
     193             :                 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba,
     194             :                 uint32_t flags);
     195             : static int bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id,
     196             :                                    uint32_t num_zones, struct spdk_bdev_zone_info *info);
     197             : static int bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
     198             :                                      enum spdk_bdev_zone_action action);
     199             : static void bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch,
     200             :                                      struct nvme_bdev_io *bio,
     201             :                                      struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
     202             : static int bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     203             :                                  void *buf, size_t nbytes);
     204             : static int bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     205             :                                     void *buf, size_t nbytes, void *md_buf, size_t md_len);
     206             : static int bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     207             :                                      struct iovec *iov, int iovcnt, size_t nbytes,
     208             :                                      void *md_buf, size_t md_len);
     209             : static void bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch,
     210             :                             struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
     211             : static void bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio);
     212             : static int bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
     213             : static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
     214             : static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
     215             : static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr);
     216             : 
     217             : static struct nvme_ns *nvme_ns_alloc(void);
     218             : static void nvme_ns_free(struct nvme_ns *ns);
     219             : 
     220             : static int
     221         173 : nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2)
     222             : {
     223         173 :         return ns1->id < ns2->id ? -1 : ns1->id > ns2->id;
     224             : }
     225             : 
     226         897 : RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp);
     227             : 
     228             : struct spdk_nvme_qpair *
     229           1 : bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
     230             : {
     231             :         struct nvme_ctrlr_channel *ctrlr_ch;
     232             : 
     233           1 :         assert(ctrlr_io_ch != NULL);
     234             : 
     235           1 :         ctrlr_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
     236             : 
     237           1 :         return ctrlr_ch->qpair->qpair;
     238             : }
     239             : 
     240             : static int
     241           0 : bdev_nvme_get_ctx_size(void)
     242             : {
     243           0 :         return sizeof(struct nvme_bdev_io);
     244             : }
     245             : 
     246             : static struct spdk_bdev_module nvme_if = {
     247             :         .name = "nvme",
     248             :         .async_fini = true,
     249             :         .module_init = bdev_nvme_library_init,
     250             :         .module_fini = bdev_nvme_library_fini,
     251             :         .config_json = bdev_nvme_config_json,
     252             :         .get_ctx_size = bdev_nvme_get_ctx_size,
     253             : 
     254             : };
     255           1 : SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
     256             : 
     257             : struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_bdev_ctrlrs);
     258             : pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
     259             : bool g_bdev_nvme_module_finish;
     260             : 
     261             : struct nvme_bdev_ctrlr *
     262         270 : nvme_bdev_ctrlr_get_by_name(const char *name)
     263             : {
     264             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     265             : 
     266         270 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     267         148 :                 if (strcmp(name, nbdev_ctrlr->name) == 0) {
     268         148 :                         break;
     269             :                 }
     270             :         }
     271             : 
     272         270 :         return nbdev_ctrlr;
     273             : }
     274             : 
     275             : static struct nvme_ctrlr *
     276          58 : nvme_bdev_ctrlr_get_ctrlr(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     277             :                           const struct spdk_nvme_transport_id *trid, const char *hostnqn)
     278             : {
     279             :         const struct spdk_nvme_ctrlr_opts *opts;
     280             :         struct nvme_ctrlr *nvme_ctrlr;
     281             : 
     282          99 :         TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
     283          74 :                 opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
     284          74 :                 if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->active_path_id->trid) == 0 &&
     285          33 :                     strcmp(hostnqn, opts->hostnqn) == 0) {
     286          33 :                         break;
     287             :                 }
     288             :         }
     289             : 
     290          58 :         return nvme_ctrlr;
     291             : }
     292             : 
     293             : struct nvme_ctrlr *
     294           0 : nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     295             :                                 uint16_t cntlid)
     296             : {
     297             :         struct nvme_ctrlr *nvme_ctrlr;
     298             :         const struct spdk_nvme_ctrlr_data *cdata;
     299             : 
     300           0 :         TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
     301           0 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
     302           0 :                 if (cdata->cntlid == cntlid) {
     303           0 :                         break;
     304             :                 }
     305             :         }
     306             : 
     307           0 :         return nvme_ctrlr;
     308             : }
     309             : 
     310             : static struct nvme_bdev *
     311          72 : nvme_bdev_ctrlr_get_bdev(struct nvme_bdev_ctrlr *nbdev_ctrlr, uint32_t nsid)
     312             : {
     313             :         struct nvme_bdev *bdev;
     314             : 
     315          72 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     316         106 :         TAILQ_FOREACH(bdev, &nbdev_ctrlr->bdevs, tailq) {
     317          68 :                 if (bdev->nsid == nsid) {
     318          34 :                         break;
     319             :                 }
     320             :         }
     321          72 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     322             : 
     323          72 :         return bdev;
     324             : }
     325             : 
     326             : struct nvme_ns *
     327         140 : nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
     328             : {
     329         140 :         struct nvme_ns ns;
     330             : 
     331         140 :         assert(nsid > 0);
     332             : 
     333         140 :         ns.id = nsid;
     334         140 :         return RB_FIND(nvme_ns_tree, &nvme_ctrlr->namespaces, &ns);
     335             : }
     336             : 
     337             : struct nvme_ns *
     338         152 : nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr)
     339             : {
     340         152 :         return RB_MIN(nvme_ns_tree, &nvme_ctrlr->namespaces);
     341             : }
     342             : 
     343             : struct nvme_ns *
     344          63 : nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns)
     345             : {
     346          63 :         if (ns == NULL) {
     347           0 :                 return NULL;
     348             :         }
     349             : 
     350          63 :         return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
     351             : }
     352             : 
     353             : static struct nvme_ctrlr *
     354          51 : nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid, const char *hostnqn)
     355             : {
     356             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
     357          51 :         struct nvme_ctrlr       *nvme_ctrlr = NULL;
     358             : 
     359          51 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     360          70 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     361          19 :                 nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, trid, hostnqn);
     362          19 :                 if (nvme_ctrlr != NULL) {
     363           0 :                         break;
     364             :                 }
     365             :         }
     366          51 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     367             : 
     368          51 :         return nvme_ctrlr;
     369             : }
     370             : 
     371             : struct nvme_ctrlr *
     372          71 : nvme_ctrlr_get_by_name(const char *name)
     373             : {
     374             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     375          71 :         struct nvme_ctrlr *nvme_ctrlr = NULL;
     376             : 
     377          71 :         if (name == NULL) {
     378           0 :                 return NULL;
     379             :         }
     380             : 
     381          71 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     382          71 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
     383          71 :         if (nbdev_ctrlr != NULL) {
     384          40 :                 nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
     385             :         }
     386          71 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     387             : 
     388          71 :         return nvme_ctrlr;
     389             : }
     390             : 
     391             : void
     392           0 : nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx)
     393             : {
     394             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     395             : 
     396           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     397           0 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     398           0 :                 fn(nbdev_ctrlr, ctx);
     399             :         }
     400           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     401           0 : }
     402             : 
     403             : void
     404           0 : nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
     405             : {
     406             :         const char *trtype_str;
     407             :         const char *adrfam_str;
     408             : 
     409           0 :         trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
     410           0 :         if (trtype_str) {
     411           0 :                 spdk_json_write_named_string(w, "trtype", trtype_str);
     412             :         }
     413             : 
     414           0 :         adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
     415           0 :         if (adrfam_str) {
     416           0 :                 spdk_json_write_named_string(w, "adrfam", adrfam_str);
     417             :         }
     418             : 
     419           0 :         if (trid->traddr[0] != '\0') {
     420           0 :                 spdk_json_write_named_string(w, "traddr", trid->traddr);
     421             :         }
     422             : 
     423           0 :         if (trid->trsvcid[0] != '\0') {
     424           0 :                 spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
     425             :         }
     426             : 
     427           0 :         if (trid->subnqn[0] != '\0') {
     428           0 :                 spdk_json_write_named_string(w, "subnqn", trid->subnqn);
     429             :         }
     430           0 : }
     431             : 
     432             : static void
     433          59 : nvme_bdev_ctrlr_delete(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     434             :                        struct nvme_ctrlr *nvme_ctrlr)
     435             : {
     436             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_delete, nvme_ctrlr->nbdev_ctrlr->name);
     437          59 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     438             : 
     439          59 :         TAILQ_REMOVE(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
     440          59 :         if (!TAILQ_EMPTY(&nbdev_ctrlr->ctrlrs)) {
     441          15 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
     442             : 
     443          15 :                 return;
     444             :         }
     445          44 :         TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
     446             : 
     447          44 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     448             : 
     449          44 :         assert(TAILQ_EMPTY(&nbdev_ctrlr->bdevs));
     450             : 
     451          44 :         free(nbdev_ctrlr->name);
     452          44 :         free(nbdev_ctrlr);
     453             : }
     454             : 
     455             : static void
     456          60 : _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
     457             : {
     458             :         struct nvme_path_id *path_id, *tmp_path;
     459             :         struct nvme_ns *ns, *tmp_ns;
     460             : 
     461          60 :         free(nvme_ctrlr->copied_ana_desc);
     462          60 :         spdk_free(nvme_ctrlr->ana_log_page);
     463             : 
     464          60 :         if (nvme_ctrlr->opal_dev) {
     465           0 :                 spdk_opal_dev_destruct(nvme_ctrlr->opal_dev);
     466           0 :                 nvme_ctrlr->opal_dev = NULL;
     467             :         }
     468             : 
     469          60 :         if (nvme_ctrlr->nbdev_ctrlr) {
     470          59 :                 nvme_bdev_ctrlr_delete(nvme_ctrlr->nbdev_ctrlr, nvme_ctrlr);
     471             :         }
     472             : 
     473          60 :         RB_FOREACH_SAFE(ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp_ns) {
     474           0 :                 RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
     475           0 :                 nvme_ns_free(ns);
     476             :         }
     477             : 
     478         120 :         TAILQ_FOREACH_SAFE(path_id, &nvme_ctrlr->trids, link, tmp_path) {
     479          60 :                 TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
     480          60 :                 free(path_id);
     481             :         }
     482             : 
     483          60 :         pthread_mutex_destroy(&nvme_ctrlr->mutex);
     484          60 :         spdk_keyring_put_key(nvme_ctrlr->psk);
     485          60 :         spdk_keyring_put_key(nvme_ctrlr->dhchap_key);
     486          60 :         spdk_keyring_put_key(nvme_ctrlr->dhchap_ctrlr_key);
     487          60 :         free(nvme_ctrlr);
     488             : 
     489          60 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     490          60 :         if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
     491           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
     492           0 :                 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
     493           0 :                 spdk_bdev_module_fini_done();
     494           0 :                 return;
     495             :         }
     496          60 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     497             : }
     498             : 
     499             : static int
     500          60 : nvme_detach_poller(void *arg)
     501             : {
     502          60 :         struct nvme_ctrlr *nvme_ctrlr = arg;
     503             :         int rc;
     504             : 
     505          60 :         rc = spdk_nvme_detach_poll_async(nvme_ctrlr->detach_ctx);
     506          60 :         if (rc != -EAGAIN) {
     507          60 :                 spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
     508          60 :                 _nvme_ctrlr_delete(nvme_ctrlr);
     509             :         }
     510             : 
     511          60 :         return SPDK_POLLER_BUSY;
     512             : }
     513             : 
     514             : static void
     515          60 : nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
     516             : {
     517             :         int rc;
     518             : 
     519          60 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
     520             : 
     521             :         /* First, unregister the adminq poller, as the driver will poll adminq if necessary */
     522          60 :         spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
     523             : 
     524             :         /* If we got here, the reset/detach poller cannot be active */
     525          60 :         assert(nvme_ctrlr->reset_detach_poller == NULL);
     526          60 :         nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(nvme_detach_poller,
     527             :                                           nvme_ctrlr, 1000);
     528          60 :         if (nvme_ctrlr->reset_detach_poller == NULL) {
     529           0 :                 SPDK_ERRLOG("Failed to register detach poller\n");
     530           0 :                 goto error;
     531             :         }
     532             : 
     533          60 :         rc = spdk_nvme_detach_async(nvme_ctrlr->ctrlr, &nvme_ctrlr->detach_ctx);
     534          60 :         if (rc != 0) {
     535           0 :                 SPDK_ERRLOG("Failed to detach the NVMe controller\n");
     536           0 :                 goto error;
     537             :         }
     538             : 
     539          60 :         return;
     540           0 : error:
     541             :         /* We don't have a good way to handle errors here, so just do what we can and delete the
     542             :          * controller without detaching the underlying NVMe device.
     543             :          */
     544           0 :         spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
     545           0 :         _nvme_ctrlr_delete(nvme_ctrlr);
     546             : }
     547             : 
     548             : static void
     549          59 : nvme_ctrlr_unregister_cb(void *io_device)
     550             : {
     551          59 :         struct nvme_ctrlr *nvme_ctrlr = io_device;
     552             : 
     553          59 :         nvme_ctrlr_delete(nvme_ctrlr);
     554          59 : }
     555             : 
     556             : static void
     557          59 : nvme_ctrlr_unregister(void *ctx)
     558             : {
     559          59 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
     560             : 
     561          59 :         spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb);
     562          59 : }
     563             : 
     564             : static bool
     565         220 : nvme_ctrlr_can_be_unregistered(struct nvme_ctrlr *nvme_ctrlr)
     566             : {
     567         220 :         if (!nvme_ctrlr->destruct) {
     568         106 :                 return false;
     569             :         }
     570             : 
     571         114 :         if (nvme_ctrlr->ref > 0) {
     572          55 :                 return false;
     573             :         }
     574             : 
     575          59 :         if (nvme_ctrlr->resetting) {
     576           0 :                 return false;
     577             :         }
     578             : 
     579          59 :         if (nvme_ctrlr->ana_log_page_updating) {
     580           0 :                 return false;
     581             :         }
     582             : 
     583          59 :         if (nvme_ctrlr->io_path_cache_clearing) {
     584           0 :                 return false;
     585             :         }
     586             : 
     587          59 :         return true;
     588             : }
     589             : 
     590             : static void
     591         164 : nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr)
     592             : {
     593         164 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
     594             :         SPDK_DTRACE_PROBE2(bdev_nvme_ctrlr_release, nvme_ctrlr->nbdev_ctrlr->name, nvme_ctrlr->ref);
     595             : 
     596         164 :         assert(nvme_ctrlr->ref > 0);
     597         164 :         nvme_ctrlr->ref--;
     598             : 
     599         164 :         if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
     600         105 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
     601         105 :                 return;
     602             :         }
     603             : 
     604          59 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
     605             : 
     606          59 :         spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr);
     607             : }
     608             : 
     609             : static void
     610         161 : bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch)
     611             : {
     612         161 :         nbdev_ch->current_io_path = NULL;
     613         161 :         nbdev_ch->rr_counter = 0;
     614         161 : }
     615             : 
     616             : static struct nvme_io_path *
     617           8 : _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
     618             : {
     619             :         struct nvme_io_path *io_path;
     620             : 
     621          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
     622          15 :                 if (io_path->nvme_ns == nvme_ns) {
     623           7 :                         break;
     624             :                 }
     625             :         }
     626             : 
     627           8 :         return io_path;
     628             : }
     629             : 
     630             : static struct nvme_io_path *
     631          35 : nvme_io_path_alloc(void)
     632             : {
     633             :         struct nvme_io_path *io_path;
     634             : 
     635          35 :         io_path = calloc(1, sizeof(*io_path));
     636          35 :         if (io_path == NULL) {
     637           0 :                 SPDK_ERRLOG("Failed to alloc io_path.\n");
     638           0 :                 return NULL;
     639             :         }
     640             : 
     641          35 :         if (g_opts.io_path_stat) {
     642           0 :                 io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
     643           0 :                 if (io_path->stat == NULL) {
     644           0 :                         free(io_path);
     645           0 :                         SPDK_ERRLOG("Failed to alloc io_path stat.\n");
     646           0 :                         return NULL;
     647             :                 }
     648           0 :                 spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
     649             :         }
     650             : 
     651          35 :         return io_path;
     652             : }
     653             : 
     654             : static void
     655          35 : nvme_io_path_free(struct nvme_io_path *io_path)
     656             : {
     657          35 :         free(io_path->stat);
     658          35 :         free(io_path);
     659          35 : }
     660             : 
     661             : static int
     662          35 : _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
     663             : {
     664             :         struct nvme_io_path *io_path;
     665             :         struct spdk_io_channel *ch;
     666             :         struct nvme_ctrlr_channel *ctrlr_ch;
     667             :         struct nvme_qpair *nvme_qpair;
     668             : 
     669          35 :         io_path = nvme_io_path_alloc();
     670          35 :         if (io_path == NULL) {
     671           0 :                 return -ENOMEM;
     672             :         }
     673             : 
     674          35 :         io_path->nvme_ns = nvme_ns;
     675             : 
     676          35 :         ch = spdk_get_io_channel(nvme_ns->ctrlr);
     677          35 :         if (ch == NULL) {
     678           0 :                 nvme_io_path_free(io_path);
     679           0 :                 SPDK_ERRLOG("Failed to alloc io_channel.\n");
     680           0 :                 return -ENOMEM;
     681             :         }
     682             : 
     683          35 :         ctrlr_ch = spdk_io_channel_get_ctx(ch);
     684             : 
     685          35 :         nvme_qpair = ctrlr_ch->qpair;
     686          35 :         assert(nvme_qpair != NULL);
     687             : 
     688          35 :         io_path->qpair = nvme_qpair;
     689          35 :         TAILQ_INSERT_TAIL(&nvme_qpair->io_path_list, io_path, tailq);
     690             : 
     691          35 :         io_path->nbdev_ch = nbdev_ch;
     692          35 :         STAILQ_INSERT_TAIL(&nbdev_ch->io_path_list, io_path, stailq);
     693             : 
     694          35 :         bdev_nvme_clear_current_io_path(nbdev_ch);
     695             : 
     696          35 :         return 0;
     697             : }
     698             : 
     699             : static void
     700          35 : bdev_nvme_clear_retry_io_path(struct nvme_bdev_channel *nbdev_ch,
     701             :                               struct nvme_io_path *io_path)
     702             : {
     703             :         struct nvme_bdev_io *bio;
     704             : 
     705          36 :         TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
     706           1 :                 if (bio->io_path == io_path) {
     707           1 :                         bio->io_path = NULL;
     708             :                 }
     709             :         }
     710          35 : }
     711             : 
     712             : static void
     713          35 : _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *io_path)
     714             : {
     715             :         struct spdk_io_channel *ch;
     716             :         struct nvme_qpair *nvme_qpair;
     717             :         struct nvme_ctrlr_channel *ctrlr_ch;
     718             :         struct nvme_bdev *nbdev;
     719             : 
     720          35 :         nbdev = spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(nbdev_ch));
     721             : 
     722             :         /* Add the statistics to nvme_ns before this path is destroyed. */
     723          35 :         pthread_mutex_lock(&nbdev->mutex);
     724          35 :         if (nbdev->ref != 0 && io_path->nvme_ns->stat != NULL && io_path->stat != NULL) {
     725           0 :                 spdk_bdev_add_io_stat(io_path->nvme_ns->stat, io_path->stat);
     726             :         }
     727          35 :         pthread_mutex_unlock(&nbdev->mutex);
     728             : 
     729          35 :         bdev_nvme_clear_current_io_path(nbdev_ch);
     730          35 :         bdev_nvme_clear_retry_io_path(nbdev_ch, io_path);
     731             : 
     732          35 :         STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
     733          35 :         io_path->nbdev_ch = NULL;
     734             : 
     735          35 :         nvme_qpair = io_path->qpair;
     736          35 :         assert(nvme_qpair != NULL);
     737             : 
     738          35 :         ctrlr_ch = nvme_qpair->ctrlr_ch;
     739          35 :         assert(ctrlr_ch != NULL);
     740             : 
     741          35 :         ch = spdk_io_channel_from_ctx(ctrlr_ch);
     742          35 :         spdk_put_io_channel(ch);
     743             : 
     744             :         /* After an io_path is removed, I/Os submitted to it may complete and update statistics
     745             :          * of the io_path. To avoid heap-use-after-free error from this case, do not free the
     746             :          * io_path here but free the io_path when the associated qpair is freed. It is ensured
     747             :          * that all I/Os submitted to the io_path are completed when the associated qpair is freed.
     748             :          */
     749          35 : }
     750             : 
     751             : static void
     752          22 : _bdev_nvme_delete_io_paths(struct nvme_bdev_channel *nbdev_ch)
     753             : {
     754             :         struct nvme_io_path *io_path, *tmp_io_path;
     755             : 
     756          55 :         STAILQ_FOREACH_SAFE(io_path, &nbdev_ch->io_path_list, stailq, tmp_io_path) {
     757          33 :                 _bdev_nvme_delete_io_path(nbdev_ch, io_path);
     758             :         }
     759          22 : }
     760             : 
     761             : static int
     762          22 : bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
     763             : {
     764          22 :         struct nvme_bdev_channel *nbdev_ch = ctx_buf;
     765          22 :         struct nvme_bdev *nbdev = io_device;
     766             :         struct nvme_ns *nvme_ns;
     767             :         int rc;
     768             : 
     769          22 :         STAILQ_INIT(&nbdev_ch->io_path_list);
     770          22 :         TAILQ_INIT(&nbdev_ch->retry_io_list);
     771             : 
     772          22 :         pthread_mutex_lock(&nbdev->mutex);
     773             : 
     774          22 :         nbdev_ch->mp_policy = nbdev->mp_policy;
     775          22 :         nbdev_ch->mp_selector = nbdev->mp_selector;
     776          22 :         nbdev_ch->rr_min_io = nbdev->rr_min_io;
     777             : 
     778          55 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
     779          33 :                 rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
     780          33 :                 if (rc != 0) {
     781           0 :                         pthread_mutex_unlock(&nbdev->mutex);
     782             : 
     783           0 :                         _bdev_nvme_delete_io_paths(nbdev_ch);
     784           0 :                         return rc;
     785             :                 }
     786             :         }
     787          22 :         pthread_mutex_unlock(&nbdev->mutex);
     788             : 
     789          22 :         return 0;
     790             : }
     791             : 
     792             : /* If cpl != NULL, complete the bdev_io with nvme status based on 'cpl'.
     793             :  * If cpl == NULL, complete the bdev_io with bdev status based on 'status'.
     794             :  */
     795             : static inline void
     796          47 : __bdev_nvme_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status,
     797             :                         const struct spdk_nvme_cpl *cpl)
     798             : {
     799          47 :         spdk_trace_record(TRACE_BDEV_NVME_IO_DONE, 0, 0, (uintptr_t)bdev_io->driver_ctx,
     800             :                           (uintptr_t)bdev_io);
     801          47 :         if (cpl) {
     802          29 :                 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
     803             :         } else {
     804          18 :                 spdk_bdev_io_complete(bdev_io, status);
     805             :         }
     806          47 : }
     807             : 
     808             : static void bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch);
     809             : 
     810             : static void
     811          22 : bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf)
     812             : {
     813          22 :         struct nvme_bdev_channel *nbdev_ch = ctx_buf;
     814             : 
     815          22 :         bdev_nvme_abort_retry_ios(nbdev_ch);
     816          22 :         _bdev_nvme_delete_io_paths(nbdev_ch);
     817          22 : }
     818             : 
     819             : static inline bool
     820          58 : bdev_nvme_io_type_is_admin(enum spdk_bdev_io_type io_type)
     821             : {
     822          58 :         switch (io_type) {
     823           5 :         case SPDK_BDEV_IO_TYPE_RESET:
     824             :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
     825             :         case SPDK_BDEV_IO_TYPE_ABORT:
     826           5 :                 return true;
     827          53 :         default:
     828          53 :                 break;
     829             :         }
     830             : 
     831          53 :         return false;
     832             : }
     833             : 
     834             : static inline bool
     835          90 : nvme_ns_is_active(struct nvme_ns *nvme_ns)
     836             : {
     837          90 :         if (spdk_unlikely(nvme_ns->ana_state_updating)) {
     838           1 :                 return false;
     839             :         }
     840             : 
     841          89 :         if (spdk_unlikely(nvme_ns->ns == NULL)) {
     842           0 :                 return false;
     843             :         }
     844             : 
     845          89 :         return true;
     846             : }
     847             : 
     848             : static inline bool
     849          78 : nvme_ns_is_accessible(struct nvme_ns *nvme_ns)
     850             : {
     851          78 :         if (spdk_unlikely(!nvme_ns_is_active(nvme_ns))) {
     852           1 :                 return false;
     853             :         }
     854             : 
     855          77 :         switch (nvme_ns->ana_state) {
     856          68 :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
     857             :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
     858          68 :                 return true;
     859           9 :         default:
     860           9 :                 break;
     861             :         }
     862             : 
     863           9 :         return false;
     864             : }
     865             : 
     866             : static inline bool
     867         117 : nvme_qpair_is_connected(struct nvme_qpair *nvme_qpair)
     868             : {
     869         117 :         if (spdk_unlikely(nvme_qpair->qpair == NULL)) {
     870          20 :                 return false;
     871             :         }
     872             : 
     873          97 :         if (spdk_unlikely(spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
     874             :                           SPDK_NVME_QPAIR_FAILURE_NONE)) {
     875           2 :                 return false;
     876             :         }
     877             : 
     878          95 :         if (spdk_unlikely(nvme_qpair->ctrlr_ch->reset_iter != NULL)) {
     879           0 :                 return false;
     880             :         }
     881             : 
     882          95 :         return true;
     883             : }
     884             : 
     885             : static inline bool
     886          92 : nvme_io_path_is_available(struct nvme_io_path *io_path)
     887             : {
     888          92 :         if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
     889          14 :                 return false;
     890             :         }
     891             : 
     892          78 :         if (spdk_unlikely(!nvme_ns_is_accessible(io_path->nvme_ns))) {
     893          10 :                 return false;
     894             :         }
     895             : 
     896          68 :         return true;
     897             : }
     898             : 
     899             : static inline bool
     900           8 : nvme_ctrlr_is_failed(struct nvme_ctrlr *nvme_ctrlr)
     901             : {
     902           8 :         if (nvme_ctrlr->destruct) {
     903           0 :                 return true;
     904             :         }
     905             : 
     906           8 :         if (nvme_ctrlr->fast_io_fail_timedout) {
     907           2 :                 return true;
     908             :         }
     909             : 
     910           6 :         if (nvme_ctrlr->resetting) {
     911           4 :                 if (nvme_ctrlr->opts.reconnect_delay_sec != 0) {
     912           4 :                         return false;
     913             :                 } else {
     914           0 :                         return true;
     915             :                 }
     916             :         }
     917             : 
     918           2 :         if (nvme_ctrlr->reconnect_is_delayed) {
     919           2 :                 return false;
     920             :         }
     921             : 
     922           0 :         if (nvme_ctrlr->disabled) {
     923           0 :                 return true;
     924             :         }
     925             : 
     926           0 :         if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
     927           0 :                 return true;
     928             :         } else {
     929           0 :                 return false;
     930             :         }
     931             : }
     932             : 
     933             : static bool
     934          20 : nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr)
     935             : {
     936          20 :         if (nvme_ctrlr->destruct) {
     937           0 :                 return false;
     938             :         }
     939             : 
     940          20 :         if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
     941           3 :                 return false;
     942             :         }
     943             : 
     944          17 :         if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) {
     945           1 :                 return false;
     946             :         }
     947             : 
     948          16 :         if (nvme_ctrlr->disabled) {
     949           0 :                 return false;
     950             :         }
     951             : 
     952          16 :         return true;
     953             : }
     954             : 
     955             : /* Simulate circular linked list. */
     956             : static inline struct nvme_io_path *
     957          87 : nvme_io_path_get_next(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *prev_path)
     958             : {
     959             :         struct nvme_io_path *next_path;
     960             : 
     961          87 :         if (prev_path != NULL) {
     962          37 :                 next_path = STAILQ_NEXT(prev_path, stailq);
     963          37 :                 if (next_path != NULL) {
     964          14 :                         return next_path;
     965             :                 }
     966             :         }
     967             : 
     968          73 :         return STAILQ_FIRST(&nbdev_ch->io_path_list);
     969             : }
     970             : 
     971             : static struct nvme_io_path *
     972          57 : _bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
     973             : {
     974          57 :         struct nvme_io_path *io_path, *start, *non_optimized = NULL;
     975             : 
     976          57 :         start = nvme_io_path_get_next(nbdev_ch, nbdev_ch->current_io_path);
     977             : 
     978          57 :         io_path = start;
     979             :         do {
     980          69 :                 if (spdk_likely(nvme_io_path_is_available(io_path))) {
     981          49 :                         switch (io_path->nvme_ns->ana_state) {
     982          39 :                         case SPDK_NVME_ANA_OPTIMIZED_STATE:
     983          39 :                                 nbdev_ch->current_io_path = io_path;
     984          39 :                                 return io_path;
     985          10 :                         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
     986          10 :                                 if (non_optimized == NULL) {
     987           7 :                                         non_optimized = io_path;
     988             :                                 }
     989          10 :                                 break;
     990           0 :                         default:
     991           0 :                                 assert(false);
     992             :                                 break;
     993             :                         }
     994             :                 }
     995          30 :                 io_path = nvme_io_path_get_next(nbdev_ch, io_path);
     996          30 :         } while (io_path != start);
     997             : 
     998          18 :         if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
     999             :                 /* We come here only if there is no optimized path. Cache even non_optimized
    1000             :                  * path for load balance across multiple non_optimized paths.
    1001             :                  */
    1002           1 :                 nbdev_ch->current_io_path = non_optimized;
    1003             :         }
    1004             : 
    1005          18 :         return non_optimized;
    1006             : }
    1007             : 
    1008             : static struct nvme_io_path *
    1009           4 : _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch)
    1010             : {
    1011             :         struct nvme_io_path *io_path;
    1012           4 :         struct nvme_io_path *optimized = NULL, *non_optimized = NULL;
    1013           4 :         uint32_t opt_min_qd = UINT32_MAX, non_opt_min_qd = UINT32_MAX;
    1014             :         uint32_t num_outstanding_reqs;
    1015             : 
    1016          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    1017          12 :                 if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
    1018             :                         /* The device is currently resetting. */
    1019           0 :                         continue;
    1020             :                 }
    1021             : 
    1022          12 :                 if (spdk_unlikely(!nvme_ns_is_active(io_path->nvme_ns))) {
    1023           0 :                         continue;
    1024             :                 }
    1025             : 
    1026          12 :                 num_outstanding_reqs = spdk_nvme_qpair_get_num_outstanding_reqs(io_path->qpair->qpair);
    1027          12 :                 switch (io_path->nvme_ns->ana_state) {
    1028           6 :                 case SPDK_NVME_ANA_OPTIMIZED_STATE:
    1029           6 :                         if (num_outstanding_reqs < opt_min_qd) {
    1030           5 :                                 opt_min_qd = num_outstanding_reqs;
    1031           5 :                                 optimized = io_path;
    1032             :                         }
    1033           6 :                         break;
    1034           3 :                 case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    1035           3 :                         if (num_outstanding_reqs < non_opt_min_qd) {
    1036           3 :                                 non_opt_min_qd = num_outstanding_reqs;
    1037           3 :                                 non_optimized = io_path;
    1038             :                         }
    1039           3 :                         break;
    1040           3 :                 default:
    1041           3 :                         break;
    1042             :                 }
    1043             :         }
    1044             : 
    1045             :         /* don't cache io path for BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH selector */
    1046           4 :         if (optimized != NULL) {
    1047           3 :                 return optimized;
    1048             :         }
    1049             : 
    1050           1 :         return non_optimized;
    1051             : }
    1052             : 
    1053             : static inline struct nvme_io_path *
    1054          95 : bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
    1055             : {
    1056          95 :         if (spdk_likely(nbdev_ch->current_io_path != NULL)) {
    1057          41 :                 if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) {
    1058          31 :                         return nbdev_ch->current_io_path;
    1059          10 :                 } else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    1060          10 :                         if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) {
    1061           3 :                                 return nbdev_ch->current_io_path;
    1062             :                         }
    1063           7 :                         nbdev_ch->rr_counter = 0;
    1064             :                 }
    1065             :         }
    1066             : 
    1067          61 :         if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE ||
    1068          14 :             nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    1069          57 :                 return _bdev_nvme_find_io_path(nbdev_ch);
    1070             :         } else {
    1071           4 :                 return _bdev_nvme_find_io_path_min_qd(nbdev_ch);
    1072             :         }
    1073             : }
    1074             : 
    1075             : /* Return true if there is any io_path whose qpair is active or ctrlr is not failed,
    1076             :  * or false otherwise.
    1077             :  *
    1078             :  * If any io_path has an active qpair but find_io_path() returned NULL, its namespace
    1079             :  * is likely to be non-accessible now but may become accessible.
    1080             :  *
    1081             :  * If any io_path has an unfailed ctrlr but find_io_path() returned NULL, the ctrlr
    1082             :  * is likely to be resetting now but the reset may succeed. A ctrlr is set to unfailed
    1083             :  * when starting to reset it but it is set to failed when the reset failed. Hence, if
    1084             :  * a ctrlr is unfailed, it is likely that it works fine or is resetting.
    1085             :  */
    1086             : static bool
    1087          13 : any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
    1088             : {
    1089             :         struct nvme_io_path *io_path;
    1090             : 
    1091          15 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    1092          13 :                 if (io_path->nvme_ns->ana_transition_timedout) {
    1093           0 :                         continue;
    1094             :                 }
    1095             : 
    1096          13 :                 if (nvme_qpair_is_connected(io_path->qpair) ||
    1097           8 :                     !nvme_ctrlr_is_failed(io_path->qpair->ctrlr)) {
    1098          11 :                         return true;
    1099             :                 }
    1100             :         }
    1101             : 
    1102           2 :         return false;
    1103             : }
    1104             : 
    1105             : static void
    1106          14 : bdev_nvme_retry_io(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
    1107             : {
    1108          14 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    1109             :         struct spdk_io_channel *ch;
    1110             : 
    1111          14 :         if (nbdev_io->io_path != NULL && nvme_io_path_is_available(nbdev_io->io_path)) {
    1112           3 :                 _bdev_nvme_submit_request(nbdev_ch, bdev_io);
    1113             :         } else {
    1114          11 :                 ch = spdk_io_channel_from_ctx(nbdev_ch);
    1115          11 :                 bdev_nvme_submit_request(ch, bdev_io);
    1116             :         }
    1117          14 : }
    1118             : 
    1119             : static int
    1120          14 : bdev_nvme_retry_ios(void *arg)
    1121             : {
    1122          14 :         struct nvme_bdev_channel *nbdev_ch = arg;
    1123             :         struct nvme_bdev_io *bio, *tmp_bio;
    1124             :         uint64_t now, delay_us;
    1125             : 
    1126          14 :         now = spdk_get_ticks();
    1127             : 
    1128          28 :         TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
    1129          15 :                 if (bio->retry_ticks > now) {
    1130           1 :                         break;
    1131             :                 }
    1132             : 
    1133          14 :                 TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
    1134             : 
    1135          14 :                 bdev_nvme_retry_io(nbdev_ch, spdk_bdev_io_from_ctx(bio));
    1136             :         }
    1137             : 
    1138          14 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1139             : 
    1140          14 :         bio = TAILQ_FIRST(&nbdev_ch->retry_io_list);
    1141          14 :         if (bio != NULL) {
    1142           4 :                 delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
    1143             : 
    1144           4 :                 nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
    1145             :                                             delay_us);
    1146             :         }
    1147             : 
    1148          14 :         return SPDK_POLLER_BUSY;
    1149             : }
    1150             : 
    1151             : static void
    1152          15 : bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
    1153             :                          struct nvme_bdev_io *bio, uint64_t delay_ms)
    1154             : {
    1155             :         struct nvme_bdev_io *tmp_bio;
    1156             : 
    1157          15 :         bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;
    1158             : 
    1159          15 :         TAILQ_FOREACH_REVERSE(tmp_bio, &nbdev_ch->retry_io_list, retry_io_head, retry_link) {
    1160           1 :                 if (tmp_bio->retry_ticks <= bio->retry_ticks) {
    1161           1 :                         TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bio, bio,
    1162             :                                            retry_link);
    1163           1 :                         return;
    1164             :                 }
    1165             :         }
    1166             : 
    1167             :         /* No earlier I/Os were found. This I/O must be the new head. */
    1168          14 :         TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bio, retry_link);
    1169             : 
    1170          14 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1171             : 
    1172          14 :         nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
    1173             :                                     delay_ms * 1000ULL);
    1174             : }
    1175             : 
    1176             : static void
    1177          36 : bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
    1178             : {
    1179             :         struct nvme_bdev_io *bio, *tmp_bio;
    1180             : 
    1181          36 :         TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
    1182           0 :                 TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
    1183           0 :                 __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
    1184             :         }
    1185             : 
    1186          36 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1187          36 : }
    1188             : 
    1189             : static int
    1190           6 : bdev_nvme_abort_retry_io(struct nvme_bdev_channel *nbdev_ch,
    1191             :                          struct nvme_bdev_io *bio_to_abort)
    1192             : {
    1193             :         struct nvme_bdev_io *bio;
    1194             : 
    1195           6 :         TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
    1196           1 :                 if (bio == bio_to_abort) {
    1197           1 :                         TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
    1198           1 :                         __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
    1199           1 :                         return 0;
    1200             :                 }
    1201             :         }
    1202             : 
    1203           5 :         return -ENOENT;
    1204             : }
    1205             : 
    1206             : static void
    1207          12 : bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl)
    1208             : {
    1209             :         struct nvme_bdev *nbdev;
    1210             :         uint16_t sct, sc;
    1211             : 
    1212          12 :         assert(spdk_nvme_cpl_is_error(cpl));
    1213             : 
    1214          12 :         nbdev = bdev_io->bdev->ctxt;
    1215             : 
    1216          12 :         if (nbdev->err_stat == NULL) {
    1217          12 :                 return;
    1218             :         }
    1219             : 
    1220           0 :         sct = cpl->status.sct;
    1221           0 :         sc = cpl->status.sc;
    1222             : 
    1223           0 :         pthread_mutex_lock(&nbdev->mutex);
    1224             : 
    1225           0 :         nbdev->err_stat->status_type[sct]++;
    1226           0 :         switch (sct) {
    1227           0 :         case SPDK_NVME_SCT_GENERIC:
    1228             :         case SPDK_NVME_SCT_COMMAND_SPECIFIC:
    1229             :         case SPDK_NVME_SCT_MEDIA_ERROR:
    1230             :         case SPDK_NVME_SCT_PATH:
    1231           0 :                 nbdev->err_stat->status[sct][sc]++;
    1232           0 :                 break;
    1233           0 :         default:
    1234           0 :                 break;
    1235             :         }
    1236             : 
    1237           0 :         pthread_mutex_unlock(&nbdev->mutex);
    1238             : }
    1239             : 
    1240             : static inline void
    1241          20 : bdev_nvme_update_io_path_stat(struct nvme_bdev_io *bio)
    1242             : {
    1243          20 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1244          20 :         uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
    1245          20 :         uint32_t blocklen = bdev_io->bdev->blocklen;
    1246             :         struct spdk_bdev_io_stat *stat;
    1247             :         uint64_t tsc_diff;
    1248             : 
    1249          20 :         if (bio->io_path->stat == NULL) {
    1250          20 :                 return;
    1251             :         }
    1252             : 
    1253           0 :         tsc_diff = spdk_get_ticks() - bio->submit_tsc;
    1254           0 :         stat = bio->io_path->stat;
    1255             : 
    1256           0 :         switch (bdev_io->type) {
    1257           0 :         case SPDK_BDEV_IO_TYPE_READ:
    1258           0 :                 stat->bytes_read += num_blocks * blocklen;
    1259           0 :                 stat->num_read_ops++;
    1260           0 :                 stat->read_latency_ticks += tsc_diff;
    1261           0 :                 if (stat->max_read_latency_ticks < tsc_diff) {
    1262           0 :                         stat->max_read_latency_ticks = tsc_diff;
    1263             :                 }
    1264           0 :                 if (stat->min_read_latency_ticks > tsc_diff) {
    1265           0 :                         stat->min_read_latency_ticks = tsc_diff;
    1266             :                 }
    1267           0 :                 break;
    1268           0 :         case SPDK_BDEV_IO_TYPE_WRITE:
    1269           0 :                 stat->bytes_written += num_blocks * blocklen;
    1270           0 :                 stat->num_write_ops++;
    1271           0 :                 stat->write_latency_ticks += tsc_diff;
    1272           0 :                 if (stat->max_write_latency_ticks < tsc_diff) {
    1273           0 :                         stat->max_write_latency_ticks = tsc_diff;
    1274             :                 }
    1275           0 :                 if (stat->min_write_latency_ticks > tsc_diff) {
    1276           0 :                         stat->min_write_latency_ticks = tsc_diff;
    1277             :                 }
    1278           0 :                 break;
    1279           0 :         case SPDK_BDEV_IO_TYPE_UNMAP:
    1280           0 :                 stat->bytes_unmapped += num_blocks * blocklen;
    1281           0 :                 stat->num_unmap_ops++;
    1282           0 :                 stat->unmap_latency_ticks += tsc_diff;
    1283           0 :                 if (stat->max_unmap_latency_ticks < tsc_diff) {
    1284           0 :                         stat->max_unmap_latency_ticks = tsc_diff;
    1285             :                 }
    1286           0 :                 if (stat->min_unmap_latency_ticks > tsc_diff) {
    1287           0 :                         stat->min_unmap_latency_ticks = tsc_diff;
    1288             :                 }
    1289           0 :                 break;
    1290           0 :         case SPDK_BDEV_IO_TYPE_ZCOPY:
    1291             :                 /* Track the data in the start phase only */
    1292           0 :                 if (!bdev_io->u.bdev.zcopy.start) {
    1293           0 :                         break;
    1294             :                 }
    1295           0 :                 if (bdev_io->u.bdev.zcopy.populate) {
    1296           0 :                         stat->bytes_read += num_blocks * blocklen;
    1297           0 :                         stat->num_read_ops++;
    1298           0 :                         stat->read_latency_ticks += tsc_diff;
    1299           0 :                         if (stat->max_read_latency_ticks < tsc_diff) {
    1300           0 :                                 stat->max_read_latency_ticks = tsc_diff;
    1301             :                         }
    1302           0 :                         if (stat->min_read_latency_ticks > tsc_diff) {
    1303           0 :                                 stat->min_read_latency_ticks = tsc_diff;
    1304             :                         }
    1305             :                 } else {
    1306           0 :                         stat->bytes_written += num_blocks * blocklen;
    1307           0 :                         stat->num_write_ops++;
    1308           0 :                         stat->write_latency_ticks += tsc_diff;
    1309           0 :                         if (stat->max_write_latency_ticks < tsc_diff) {
    1310           0 :                                 stat->max_write_latency_ticks = tsc_diff;
    1311             :                         }
    1312           0 :                         if (stat->min_write_latency_ticks > tsc_diff) {
    1313           0 :                                 stat->min_write_latency_ticks = tsc_diff;
    1314             :                         }
    1315             :                 }
    1316           0 :                 break;
    1317           0 :         case SPDK_BDEV_IO_TYPE_COPY:
    1318           0 :                 stat->bytes_copied += num_blocks * blocklen;
    1319           0 :                 stat->num_copy_ops++;
    1320           0 :                 stat->copy_latency_ticks += tsc_diff;
    1321           0 :                 if (stat->max_copy_latency_ticks < tsc_diff) {
    1322           0 :                         stat->max_copy_latency_ticks = tsc_diff;
    1323             :                 }
    1324           0 :                 if (stat->min_copy_latency_ticks > tsc_diff) {
    1325           0 :                         stat->min_copy_latency_ticks = tsc_diff;
    1326             :                 }
    1327           0 :                 break;
    1328           0 :         default:
    1329           0 :                 break;
    1330             :         }
    1331             : }
    1332             : 
    1333             : static bool
    1334           7 : bdev_nvme_check_retry_io(struct nvme_bdev_io *bio,
    1335             :                          const struct spdk_nvme_cpl *cpl,
    1336             :                          struct nvme_bdev_channel *nbdev_ch,
    1337             :                          uint64_t *_delay_ms)
    1338             : {
    1339           7 :         struct nvme_io_path *io_path = bio->io_path;
    1340           7 :         struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
    1341             :         const struct spdk_nvme_ctrlr_data *cdata;
    1342             : 
    1343           7 :         if (spdk_nvme_cpl_is_path_error(cpl) ||
    1344           5 :             spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
    1345           4 :             !nvme_io_path_is_available(io_path) ||
    1346           4 :             !nvme_ctrlr_is_available(nvme_ctrlr)) {
    1347           3 :                 bdev_nvme_clear_current_io_path(nbdev_ch);
    1348           3 :                 bio->io_path = NULL;
    1349           3 :                 if (spdk_nvme_cpl_is_ana_error(cpl)) {
    1350           1 :                         if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
    1351           1 :                                 io_path->nvme_ns->ana_state_updating = true;
    1352             :                         }
    1353             :                 }
    1354           3 :                 if (!any_io_path_may_become_available(nbdev_ch)) {
    1355           0 :                         return false;
    1356             :                 }
    1357           3 :                 *_delay_ms = 0;
    1358             :         } else {
    1359           4 :                 bio->retry_count++;
    1360             : 
    1361           4 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    1362             : 
    1363           4 :                 if (cpl->status.crd != 0) {
    1364           1 :                         *_delay_ms = cdata->crdt[cpl->status.crd] * 100;
    1365             :                 } else {
    1366           3 :                         *_delay_ms = 0;
    1367             :                 }
    1368             :         }
    1369             : 
    1370           7 :         return true;
    1371             : }
    1372             : 
    1373             : static inline void
    1374          32 : bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
    1375             :                                   const struct spdk_nvme_cpl *cpl)
    1376             : {
    1377          32 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1378             :         struct nvme_bdev_channel *nbdev_ch;
    1379          32 :         uint64_t delay_ms;
    1380             : 
    1381          32 :         assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
    1382             : 
    1383          32 :         if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
    1384          20 :                 bdev_nvme_update_io_path_stat(bio);
    1385          20 :                 goto complete;
    1386             :         }
    1387             : 
    1388             :         /* Update error counts before deciding if retry is needed.
    1389             :          * Hence, error counts may be more than the number of I/O errors.
    1390             :          */
    1391          12 :         bdev_nvme_update_nvme_error_stat(bdev_io, cpl);
    1392             : 
    1393          12 :         if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) ||
    1394           8 :             (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
    1395           5 :                 goto complete;
    1396             :         }
    1397             : 
    1398             :         /* At this point we don't know whether the sequence was successfully executed or not, so we
    1399             :          * cannot retry the IO */
    1400           7 :         if (bdev_io->u.bdev.accel_sequence != NULL) {
    1401           0 :                 goto complete;
    1402             :         }
    1403             : 
    1404           7 :         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    1405             : 
    1406           7 :         if (bdev_nvme_check_retry_io(bio, cpl, nbdev_ch, &delay_ms)) {
    1407           7 :                 bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
    1408           7 :                 return;
    1409             :         }
    1410             : 
    1411          25 : complete:
    1412          25 :         bio->retry_count = 0;
    1413          25 :         bio->submit_tsc = 0;
    1414          25 :         bdev_io->u.bdev.accel_sequence = NULL;
    1415          25 :         __bdev_nvme_io_complete(bdev_io, 0, cpl);
    1416             : }
    1417             : 
    1418             : static inline void
    1419          11 : bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
    1420             : {
    1421          11 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1422             :         struct nvme_bdev_channel *nbdev_ch;
    1423             :         enum spdk_bdev_io_status io_status;
    1424             : 
    1425          11 :         assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
    1426             : 
    1427          11 :         switch (rc) {
    1428           1 :         case 0:
    1429           1 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    1430           1 :                 break;
    1431           0 :         case -ENOMEM:
    1432           0 :                 io_status = SPDK_BDEV_IO_STATUS_NOMEM;
    1433           0 :                 break;
    1434          10 :         case -ENXIO:
    1435          10 :                 if (g_opts.bdev_retry_count == -1 || bio->retry_count < g_opts.bdev_retry_count) {
    1436          10 :                         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    1437             : 
    1438          10 :                         bdev_nvme_clear_current_io_path(nbdev_ch);
    1439          10 :                         bio->io_path = NULL;
    1440             : 
    1441          10 :                         if (any_io_path_may_become_available(nbdev_ch)) {
    1442           8 :                                 bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
    1443           8 :                                 return;
    1444             :                         }
    1445             :                 }
    1446             : 
    1447             :         /* fallthrough */
    1448             :         default:
    1449           2 :                 spdk_accel_sequence_abort(bdev_io->u.bdev.accel_sequence);
    1450           2 :                 bdev_io->u.bdev.accel_sequence = NULL;
    1451           2 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    1452           2 :                 break;
    1453             :         }
    1454             : 
    1455           3 :         bio->retry_count = 0;
    1456           3 :         bio->submit_tsc = 0;
    1457           3 :         __bdev_nvme_io_complete(bdev_io, io_status, NULL);
    1458             : }
    1459             : 
    1460             : static inline void
    1461           4 : bdev_nvme_admin_complete(struct nvme_bdev_io *bio, int rc)
    1462             : {
    1463           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1464             :         enum spdk_bdev_io_status io_status;
    1465             : 
    1466           4 :         switch (rc) {
    1467           1 :         case 0:
    1468           1 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    1469           1 :                 break;
    1470           0 :         case -ENOMEM:
    1471           0 :                 io_status = SPDK_BDEV_IO_STATUS_NOMEM;
    1472           0 :                 break;
    1473           3 :         case -ENXIO:
    1474             :         /* fallthrough */
    1475             :         default:
    1476           3 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    1477           3 :                 break;
    1478             :         }
    1479             : 
    1480           4 :         __bdev_nvme_io_complete(bdev_io, io_status, NULL);
    1481           4 : }
    1482             : 
    1483             : static void
    1484           3 : bdev_nvme_clear_io_path_caches_done(struct spdk_io_channel_iter *i, int status)
    1485             : {
    1486           3 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    1487             : 
    1488           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    1489             : 
    1490           3 :         assert(nvme_ctrlr->io_path_cache_clearing == true);
    1491           3 :         nvme_ctrlr->io_path_cache_clearing = false;
    1492             : 
    1493           3 :         if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    1494           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1495           3 :                 return;
    1496             :         }
    1497             : 
    1498           0 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1499             : 
    1500           0 :         nvme_ctrlr_unregister(nvme_ctrlr);
    1501             : }
    1502             : 
    1503             : static void
    1504         320 : _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
    1505             : {
    1506             :         struct nvme_io_path *io_path;
    1507             : 
    1508         459 :         TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
    1509         139 :                 if (io_path->nbdev_ch == NULL) {
    1510          64 :                         continue;
    1511             :                 }
    1512          75 :                 bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
    1513             :         }
    1514         320 : }
    1515             : 
    1516             : static void
    1517           1 : bdev_nvme_clear_io_path_cache(struct spdk_io_channel_iter *i)
    1518             : {
    1519           1 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    1520           1 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
    1521             : 
    1522           1 :         assert(ctrlr_ch->qpair != NULL);
    1523             : 
    1524           1 :         _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
    1525             : 
    1526           1 :         spdk_for_each_channel_continue(i, 0);
    1527           1 : }
    1528             : 
    1529             : static void
    1530           3 : bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr)
    1531             : {
    1532           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    1533           3 :         if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
    1534             :             nvme_ctrlr->io_path_cache_clearing) {
    1535           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1536           0 :                 return;
    1537             :         }
    1538             : 
    1539           3 :         nvme_ctrlr->io_path_cache_clearing = true;
    1540           3 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1541             : 
    1542           3 :         spdk_for_each_channel(nvme_ctrlr,
    1543             :                               bdev_nvme_clear_io_path_cache,
    1544             :                               NULL,
    1545             :                               bdev_nvme_clear_io_path_caches_done);
    1546             : }
    1547             : 
    1548             : static struct nvme_qpair *
    1549          99 : nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair *qpair)
    1550             : {
    1551             :         struct nvme_qpair *nvme_qpair;
    1552             : 
    1553         108 :         TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
    1554         108 :                 if (nvme_qpair->qpair == qpair) {
    1555          99 :                         break;
    1556             :                 }
    1557             :         }
    1558             : 
    1559          99 :         return nvme_qpair;
    1560             : }
    1561             : 
    1562             : static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair);
    1563             : 
    1564             : static void
    1565          99 : bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
    1566             : {
    1567          99 :         struct nvme_poll_group *group = poll_group_ctx;
    1568             :         struct nvme_qpair *nvme_qpair;
    1569             :         struct nvme_ctrlr_channel *ctrlr_ch;
    1570             :         int status;
    1571             : 
    1572          99 :         nvme_qpair = nvme_poll_group_get_qpair(group, qpair);
    1573          99 :         if (nvme_qpair == NULL) {
    1574           0 :                 return;
    1575             :         }
    1576             : 
    1577          99 :         if (nvme_qpair->qpair != NULL) {
    1578          99 :                 spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
    1579          99 :                 nvme_qpair->qpair = NULL;
    1580             :         }
    1581             : 
    1582          99 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1583             : 
    1584          99 :         ctrlr_ch = nvme_qpair->ctrlr_ch;
    1585             : 
    1586          99 :         if (ctrlr_ch != NULL) {
    1587          56 :                 if (ctrlr_ch->reset_iter != NULL) {
    1588             :                         /* We are in a full reset sequence. */
    1589          52 :                         if (ctrlr_ch->connect_poller != NULL) {
    1590             :                                 /* qpair was failed to connect. Abort the reset sequence. */
    1591           0 :                                 SPDK_DEBUGLOG(bdev_nvme, "qpair %p was failed to connect. abort the reset ctrlr sequence.\n",
    1592             :                                               qpair);
    1593           0 :                                 spdk_poller_unregister(&ctrlr_ch->connect_poller);
    1594           0 :                                 status = -1;
    1595             :                         } else {
    1596             :                                 /* qpair was completed to disconnect. Just move to the next ctrlr_channel. */
    1597          52 :                                 SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed in a reset ctrlr sequence.\n",
    1598             :                                               qpair);
    1599          52 :                                 status = 0;
    1600             :                         }
    1601          52 :                         spdk_for_each_channel_continue(ctrlr_ch->reset_iter, status);
    1602          52 :                         ctrlr_ch->reset_iter = NULL;
    1603             :                 } else {
    1604             :                         /* qpair was disconnected unexpectedly. Reset controller for recovery. */
    1605           4 :                         SPDK_NOTICELOG("qpair %p was disconnected and freed. reset controller.\n", qpair);
    1606           4 :                         bdev_nvme_failover_ctrlr(nvme_qpair->ctrlr);
    1607             :                 }
    1608             :         } else {
    1609             :                 /* In this case, ctrlr_channel is already deleted. */
    1610          43 :                 SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed. delete nvme_qpair.\n", qpair);
    1611          43 :                 nvme_qpair_delete(nvme_qpair);
    1612             :         }
    1613             : }
    1614             : 
    1615             : static void
    1616           0 : bdev_nvme_check_io_qpairs(struct nvme_poll_group *group)
    1617             : {
    1618             :         struct nvme_qpair *nvme_qpair;
    1619             : 
    1620           0 :         TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
    1621           0 :                 if (nvme_qpair->qpair == NULL || nvme_qpair->ctrlr_ch == NULL) {
    1622           0 :                         continue;
    1623             :                 }
    1624             : 
    1625           0 :                 if (spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
    1626             :                     SPDK_NVME_QPAIR_FAILURE_NONE) {
    1627           0 :                         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1628             :                 }
    1629             :         }
    1630           0 : }
    1631             : 
    1632             : static int
    1633        1025 : bdev_nvme_poll(void *arg)
    1634             : {
    1635        1025 :         struct nvme_poll_group *group = arg;
    1636             :         int64_t num_completions;
    1637             : 
    1638        1025 :         if (group->collect_spin_stat && group->start_ticks == 0) {
    1639           0 :                 group->start_ticks = spdk_get_ticks();
    1640             :         }
    1641             : 
    1642        1025 :         num_completions = spdk_nvme_poll_group_process_completions(group->group, 0,
    1643             :                           bdev_nvme_disconnected_qpair_cb);
    1644        1025 :         if (group->collect_spin_stat) {
    1645           0 :                 if (num_completions > 0) {
    1646           0 :                         if (group->end_ticks != 0) {
    1647           0 :                                 group->spin_ticks += (group->end_ticks - group->start_ticks);
    1648           0 :                                 group->end_ticks = 0;
    1649             :                         }
    1650           0 :                         group->start_ticks = 0;
    1651             :                 } else {
    1652           0 :                         group->end_ticks = spdk_get_ticks();
    1653             :                 }
    1654             :         }
    1655             : 
    1656        1025 :         if (spdk_unlikely(num_completions < 0)) {
    1657           0 :                 bdev_nvme_check_io_qpairs(group);
    1658             :         }
    1659             : 
    1660        1025 :         return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
    1661             : }
    1662             : 
    1663             : static int bdev_nvme_poll_adminq(void *arg);
    1664             : 
    1665             : static void
    1666         100 : bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_period_us)
    1667             : {
    1668         100 :         spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
    1669             : 
    1670         100 :         nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq,
    1671             :                                           nvme_ctrlr, new_period_us);
    1672         100 : }
    1673             : 
    1674             : static int
    1675         146 : bdev_nvme_poll_adminq(void *arg)
    1676             : {
    1677             :         int32_t rc;
    1678         146 :         struct nvme_ctrlr *nvme_ctrlr = arg;
    1679             :         nvme_ctrlr_disconnected_cb disconnected_cb;
    1680             : 
    1681         146 :         assert(nvme_ctrlr != NULL);
    1682             : 
    1683         146 :         rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
    1684         146 :         if (rc < 0) {
    1685          53 :                 disconnected_cb = nvme_ctrlr->disconnected_cb;
    1686          53 :                 nvme_ctrlr->disconnected_cb = NULL;
    1687             : 
    1688          53 :                 if (disconnected_cb != NULL) {
    1689          50 :                         bdev_nvme_change_adminq_poll_period(nvme_ctrlr,
    1690             :                                                             g_opts.nvme_adminq_poll_period_us);
    1691          50 :                         disconnected_cb(nvme_ctrlr);
    1692             :                 } else {
    1693           3 :                         bdev_nvme_failover_ctrlr(nvme_ctrlr);
    1694             :                 }
    1695          93 :         } else if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(nvme_ctrlr->ctrlr) !=
    1696             :                    SPDK_NVME_QPAIR_FAILURE_NONE) {
    1697           0 :                 bdev_nvme_clear_io_path_caches(nvme_ctrlr);
    1698             :         }
    1699             : 
    1700         146 :         return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
    1701             : }
    1702             : 
    1703             : static void
    1704          37 : nvme_bdev_free(void *io_device)
    1705             : {
    1706          37 :         struct nvme_bdev *nvme_disk = io_device;
    1707             : 
    1708          37 :         pthread_mutex_destroy(&nvme_disk->mutex);
    1709          37 :         free(nvme_disk->disk.name);
    1710          37 :         free(nvme_disk->err_stat);
    1711          37 :         free(nvme_disk);
    1712          37 : }
    1713             : 
    1714             : static int
    1715          36 : bdev_nvme_destruct(void *ctx)
    1716             : {
    1717          36 :         struct nvme_bdev *nvme_disk = ctx;
    1718             :         struct nvme_ns *nvme_ns, *tmp_nvme_ns;
    1719             : 
    1720             :         SPDK_DTRACE_PROBE2(bdev_nvme_destruct, nvme_disk->nbdev_ctrlr->name, nvme_disk->nsid);
    1721             : 
    1722          73 :         TAILQ_FOREACH_SAFE(nvme_ns, &nvme_disk->nvme_ns_list, tailq, tmp_nvme_ns) {
    1723          37 :                 pthread_mutex_lock(&nvme_ns->ctrlr->mutex);
    1724             : 
    1725          37 :                 nvme_ns->bdev = NULL;
    1726             : 
    1727          37 :                 assert(nvme_ns->id > 0);
    1728             : 
    1729          37 :                 if (nvme_ctrlr_get_ns(nvme_ns->ctrlr, nvme_ns->id) == NULL) {
    1730           0 :                         pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
    1731             : 
    1732           0 :                         nvme_ctrlr_release(nvme_ns->ctrlr);
    1733           0 :                         nvme_ns_free(nvme_ns);
    1734             :                 } else {
    1735          37 :                         pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
    1736             :                 }
    1737             :         }
    1738             : 
    1739          36 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    1740          36 :         TAILQ_REMOVE(&nvme_disk->nbdev_ctrlr->bdevs, nvme_disk, tailq);
    1741          36 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    1742             : 
    1743          36 :         spdk_io_device_unregister(nvme_disk, nvme_bdev_free);
    1744             : 
    1745          36 :         return 0;
    1746             : }
    1747             : 
    1748             : static int
    1749         100 : bdev_nvme_create_qpair(struct nvme_qpair *nvme_qpair)
    1750             : {
    1751             :         struct nvme_ctrlr *nvme_ctrlr;
    1752         100 :         struct spdk_nvme_io_qpair_opts opts;
    1753             :         struct spdk_nvme_qpair *qpair;
    1754             :         int rc;
    1755             : 
    1756         100 :         nvme_ctrlr = nvme_qpair->ctrlr;
    1757             : 
    1758         100 :         spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
    1759         100 :         opts.delay_cmd_submit = g_opts.delay_cmd_submit;
    1760         100 :         opts.create_only = true;
    1761         100 :         opts.async_mode = true;
    1762         100 :         opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests);
    1763         100 :         g_opts.io_queue_requests = opts.io_queue_requests;
    1764             : 
    1765         100 :         qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
    1766         100 :         if (qpair == NULL) {
    1767           0 :                 return -1;
    1768             :         }
    1769             : 
    1770             :         SPDK_DTRACE_PROBE3(bdev_nvme_create_qpair, nvme_ctrlr->nbdev_ctrlr->name,
    1771             :                            spdk_nvme_qpair_get_id(qpair), spdk_thread_get_id(nvme_ctrlr->thread));
    1772             : 
    1773         100 :         assert(nvme_qpair->group != NULL);
    1774             : 
    1775         100 :         rc = spdk_nvme_poll_group_add(nvme_qpair->group->group, qpair);
    1776         100 :         if (rc != 0) {
    1777           0 :                 SPDK_ERRLOG("Unable to begin polling on NVMe Channel.\n");
    1778           0 :                 goto err;
    1779             :         }
    1780             : 
    1781         100 :         rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_ctrlr->ctrlr, qpair);
    1782         100 :         if (rc != 0) {
    1783           0 :                 SPDK_ERRLOG("Unable to connect I/O qpair.\n");
    1784           0 :                 goto err;
    1785             :         }
    1786             : 
    1787         100 :         nvme_qpair->qpair = qpair;
    1788             : 
    1789         100 :         if (!g_opts.disable_auto_failback) {
    1790          71 :                 _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1791             :         }
    1792             : 
    1793         100 :         return 0;
    1794             : 
    1795           0 : err:
    1796           0 :         spdk_nvme_ctrlr_free_io_qpair(qpair);
    1797             : 
    1798           0 :         return rc;
    1799             : }
    1800             : 
    1801             : static void bdev_nvme_reset_io_continue(void *cb_arg, int rc);
    1802             : 
    1803             : static void
    1804          82 : bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i)
    1805             : {
    1806          82 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    1807          82 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
    1808          82 :         int rc = 0;
    1809             :         struct nvme_bdev_io *bio;
    1810             : 
    1811          82 :         if (spdk_io_channel_iter_get_ctx(i) != NULL) {
    1812          35 :                 rc = -1;
    1813             :         }
    1814             : 
    1815          86 :         while (!TAILQ_EMPTY(&ctrlr_ch->pending_resets)) {
    1816           4 :                 bio = TAILQ_FIRST(&ctrlr_ch->pending_resets);
    1817           4 :                 TAILQ_REMOVE(&ctrlr_ch->pending_resets, bio, retry_link);
    1818             : 
    1819           4 :                 bdev_nvme_reset_io_continue(bio, rc);
    1820             :         }
    1821             : 
    1822          82 :         spdk_for_each_channel_continue(i, 0);
    1823          82 : }
    1824             : 
    1825             : /* This function marks the current trid as failed by storing the current ticks
    1826             :  * and then sets the next trid to the active trid within a controller if exists.
    1827             :  *
    1828             :  * The purpose of the boolean return value is to request the caller to disconnect
    1829             :  * the current trid now to try connecting the next trid.
    1830             :  */
    1831             : static bool
    1832          36 : bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove, bool start)
    1833             : {
    1834             :         struct nvme_path_id *path_id, *next_path;
    1835             :         int rc __attribute__((unused));
    1836             : 
    1837          36 :         path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
    1838          36 :         assert(path_id);
    1839          36 :         assert(path_id == nvme_ctrlr->active_path_id);
    1840          36 :         next_path = TAILQ_NEXT(path_id, link);
    1841             : 
    1842             :         /* Update the last failed time. It means the trid is failed if its last
    1843             :          * failed time is non-zero.
    1844             :          */
    1845          36 :         path_id->last_failed_tsc = spdk_get_ticks();
    1846             : 
    1847          36 :         if (next_path == NULL) {
    1848             :                 /* There is no alternate trid within a controller. */
    1849          25 :                 return false;
    1850             :         }
    1851             : 
    1852          11 :         if (!start && nvme_ctrlr->opts.reconnect_delay_sec == 0) {
    1853             :                 /* Connect is not retried in a controller reset sequence. Connecting
    1854             :                  * the next trid will be done by the next bdev_nvme_failover_ctrlr() call.
    1855             :                  */
    1856           3 :                 return false;
    1857             :         }
    1858             : 
    1859           8 :         assert(path_id->trid.trtype != SPDK_NVME_TRANSPORT_PCIE);
    1860             : 
    1861           8 :         SPDK_NOTICELOG("Start failover from %s:%s to %s:%s\n", path_id->trid.traddr,
    1862             :                        path_id->trid.trsvcid,        next_path->trid.traddr, next_path->trid.trsvcid);
    1863             : 
    1864           8 :         spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
    1865           8 :         nvme_ctrlr->active_path_id = next_path;
    1866           8 :         rc = spdk_nvme_ctrlr_set_trid(nvme_ctrlr->ctrlr, &next_path->trid);
    1867           8 :         assert(rc == 0);
    1868           8 :         TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
    1869           8 :         if (!remove) {
    1870             :                 /** Shuffle the old trid to the end of the list and use the new one.
    1871             :                  * Allows for round robin through multiple connections.
    1872             :                  */
    1873           6 :                 TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, path_id, link);
    1874             :         } else {
    1875           2 :                 free(path_id);
    1876             :         }
    1877             : 
    1878           8 :         if (start || next_path->last_failed_tsc == 0) {
    1879             :                 /* bdev_nvme_failover_ctrlr() is just called or the next trid is not failed
    1880             :                  * or used yet. Try the next trid now.
    1881             :                  */
    1882           7 :                 return true;
    1883             :         }
    1884             : 
    1885           1 :         if (spdk_get_ticks() > next_path->last_failed_tsc + spdk_get_ticks_hz() *
    1886           1 :             nvme_ctrlr->opts.reconnect_delay_sec) {
    1887             :                 /* Enough backoff passed since the next trid failed. Try the next trid now. */
    1888           0 :                 return true;
    1889             :         }
    1890             : 
    1891             :         /* The next trid will be tried after reconnect_delay_sec seconds. */
    1892           1 :         return false;
    1893             : }
    1894             : 
    1895             : static bool
    1896          68 : bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr)
    1897             : {
    1898             :         int32_t elapsed;
    1899             : 
    1900          68 :         if (nvme_ctrlr->opts.ctrlr_loss_timeout_sec == 0 ||
    1901          36 :             nvme_ctrlr->opts.ctrlr_loss_timeout_sec == -1) {
    1902          42 :                 return false;
    1903             :         }
    1904             : 
    1905          26 :         elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
    1906          26 :         if (elapsed >= nvme_ctrlr->opts.ctrlr_loss_timeout_sec) {
    1907           6 :                 return true;
    1908             :         } else {
    1909          20 :                 return false;
    1910             :         }
    1911             : }
    1912             : 
    1913             : static bool
    1914          12 : bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr)
    1915             : {
    1916             :         uint32_t elapsed;
    1917             : 
    1918          12 :         if (nvme_ctrlr->opts.fast_io_fail_timeout_sec == 0) {
    1919           8 :                 return false;
    1920             :         }
    1921             : 
    1922           4 :         elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
    1923           4 :         if (elapsed >= nvme_ctrlr->opts.fast_io_fail_timeout_sec) {
    1924           2 :                 return true;
    1925             :         } else {
    1926           2 :                 return false;
    1927             :         }
    1928             : }
    1929             : 
    1930             : static void bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success);
    1931             : 
    1932             : static void
    1933          51 : nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb cb_fn)
    1934             : {
    1935             :         int rc;
    1936             : 
    1937          51 :         rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
    1938          51 :         if (rc != 0) {
    1939             :                 /* Disconnect fails if ctrlr is already resetting or removed. In this case,
    1940             :                  * fail the reset sequence immediately.
    1941             :                  */
    1942           1 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    1943           1 :                 return;
    1944             :         }
    1945             : 
    1946             :         /* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
    1947             :          * Set callback here to execute the specified operation after ctrlr is really disconnected.
    1948             :          */
    1949          50 :         assert(nvme_ctrlr->disconnected_cb == NULL);
    1950          50 :         nvme_ctrlr->disconnected_cb = cb_fn;
    1951             : 
    1952             :         /* During disconnection, reduce the period to poll adminq more often. */
    1953          50 :         bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0);
    1954             : }
    1955             : 
    1956             : enum bdev_nvme_op_after_reset {
    1957             :         OP_NONE,
    1958             :         OP_COMPLETE_PENDING_DESTRUCT,
    1959             :         OP_DESTRUCT,
    1960             :         OP_DELAYED_RECONNECT,
    1961             :         OP_FAILOVER,
    1962             : };
    1963             : 
    1964             : typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset;
    1965             : 
    1966             : static _bdev_nvme_op_after_reset
    1967          50 : bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success)
    1968             : {
    1969          50 :         if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    1970             :                 /* Complete pending destruct after reset completes. */
    1971           0 :                 return OP_COMPLETE_PENDING_DESTRUCT;
    1972          50 :         } else if (nvme_ctrlr->pending_failover) {
    1973           3 :                 nvme_ctrlr->pending_failover = false;
    1974           3 :                 nvme_ctrlr->reset_start_tsc = 0;
    1975           3 :                 return OP_FAILOVER;
    1976          47 :         } else if (success || nvme_ctrlr->opts.reconnect_delay_sec == 0) {
    1977          33 :                 nvme_ctrlr->reset_start_tsc = 0;
    1978          33 :                 return OP_NONE;
    1979          14 :         } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
    1980           2 :                 return OP_DESTRUCT;
    1981             :         } else {
    1982          12 :                 if (bdev_nvme_check_fast_io_fail_timeout(nvme_ctrlr)) {
    1983           2 :                         nvme_ctrlr->fast_io_fail_timedout = true;
    1984             :                 }
    1985          12 :                 return OP_DELAYED_RECONNECT;
    1986             :         }
    1987             : }
    1988             : 
    1989             : static int bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug);
    1990             : static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
    1991             : 
    1992             : static int
    1993           9 : bdev_nvme_reconnect_delay_timer_expired(void *ctx)
    1994             : {
    1995           9 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    1996             : 
    1997             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect_delay, nvme_ctrlr->nbdev_ctrlr->name);
    1998           9 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    1999             : 
    2000           9 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2001             : 
    2002           9 :         if (!nvme_ctrlr->reconnect_is_delayed) {
    2003           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2004           0 :                 return SPDK_POLLER_BUSY;
    2005             :         }
    2006             : 
    2007           9 :         nvme_ctrlr->reconnect_is_delayed = false;
    2008             : 
    2009           9 :         if (nvme_ctrlr->destruct) {
    2010           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2011           0 :                 return SPDK_POLLER_BUSY;
    2012             :         }
    2013             : 
    2014           9 :         assert(nvme_ctrlr->resetting == false);
    2015           9 :         nvme_ctrlr->resetting = true;
    2016             : 
    2017           9 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2018             : 
    2019           9 :         spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
    2020             : 
    2021           9 :         bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2022           9 :         return SPDK_POLLER_BUSY;
    2023             : }
    2024             : 
    2025             : static void
    2026          12 : bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)
    2027             : {
    2028          12 :         spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
    2029             : 
    2030          12 :         assert(nvme_ctrlr->reconnect_is_delayed == false);
    2031          12 :         nvme_ctrlr->reconnect_is_delayed = true;
    2032             : 
    2033          12 :         assert(nvme_ctrlr->reconnect_delay_timer == NULL);
    2034          12 :         nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired,
    2035             :                                             nvme_ctrlr,
    2036             :                                             nvme_ctrlr->opts.reconnect_delay_sec * SPDK_SEC_TO_USEC);
    2037          12 : }
    2038             : 
    2039             : static void remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr);
    2040             : 
    2041             : static void
    2042          48 : _bdev_nvme_reset_ctrlr_complete(struct spdk_io_channel_iter *i, int status)
    2043             : {
    2044          48 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    2045          48 :         bool success = spdk_io_channel_iter_get_ctx(i) == NULL;
    2046          48 :         bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
    2047          48 :         void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
    2048             :         enum bdev_nvme_op_after_reset op_after_reset;
    2049             : 
    2050          48 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2051             : 
    2052          48 :         nvme_ctrlr->ctrlr_op_cb_fn = NULL;
    2053          48 :         nvme_ctrlr->ctrlr_op_cb_arg = NULL;
    2054             : 
    2055          48 :         if (!success) {
    2056          21 :                 SPDK_ERRLOG("Resetting controller failed.\n");
    2057             :         } else {
    2058          27 :                 SPDK_NOTICELOG("Resetting controller successful.\n");
    2059             :         }
    2060             : 
    2061          48 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2062          48 :         nvme_ctrlr->resetting = false;
    2063          48 :         nvme_ctrlr->dont_retry = false;
    2064          48 :         nvme_ctrlr->in_failover = false;
    2065             : 
    2066          48 :         op_after_reset = bdev_nvme_check_op_after_reset(nvme_ctrlr, success);
    2067          48 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2068             : 
    2069             :         /* Delay callbacks when the next operation is a failover. */
    2070          48 :         if (ctrlr_op_cb_fn && op_after_reset != OP_FAILOVER) {
    2071          10 :                 ctrlr_op_cb_fn(ctrlr_op_cb_arg, success ? 0 : -1);
    2072             :         }
    2073             : 
    2074          48 :         switch (op_after_reset) {
    2075           0 :         case OP_COMPLETE_PENDING_DESTRUCT:
    2076           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    2077           0 :                 break;
    2078           2 :         case OP_DESTRUCT:
    2079           2 :                 bdev_nvme_delete_ctrlr(nvme_ctrlr, false);
    2080           2 :                 remove_discovery_entry(nvme_ctrlr);
    2081           2 :                 break;
    2082          12 :         case OP_DELAYED_RECONNECT:
    2083          12 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_start_reconnect_delay_timer);
    2084          12 :                 break;
    2085           3 :         case OP_FAILOVER:
    2086           3 :                 nvme_ctrlr->ctrlr_op_cb_fn = ctrlr_op_cb_fn;
    2087           3 :                 nvme_ctrlr->ctrlr_op_cb_arg = ctrlr_op_cb_arg;
    2088           3 :                 bdev_nvme_failover_ctrlr(nvme_ctrlr);
    2089           3 :                 break;
    2090          31 :         default:
    2091          31 :                 break;
    2092             :         }
    2093          48 : }
    2094             : 
    2095             : static void
    2096          50 : bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
    2097             : {
    2098          50 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2099          50 :         if (!success) {
    2100             :                 /* Connecting the active trid failed. Set the next alternate trid to the
    2101             :                  * active trid if it exists.
    2102             :                  */
    2103          23 :                 if (bdev_nvme_failover_trid(nvme_ctrlr, false, false)) {
    2104             :                         /* The next alternate trid exists and is ready to try. Try it now. */
    2105           2 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2106             : 
    2107           2 :                         nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
    2108           2 :                         return;
    2109             :                 }
    2110             : 
    2111             :                 /* We came here if there is no alternate trid or if the next trid exists but
    2112             :                  * is not ready to try. We will try the active trid after reconnect_delay_sec
    2113             :                  * seconds if it is non-zero or at the next reset call otherwise.
    2114             :                  */
    2115             :         } else {
    2116             :                 /* Connecting the active trid succeeded. Clear the last failed time because it
    2117             :                  * means the trid is failed if its last failed time is non-zero.
    2118             :                  */
    2119          27 :                 nvme_ctrlr->active_path_id->last_failed_tsc = 0;
    2120             :         }
    2121          48 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2122             : 
    2123             :         /* Make sure we clear any pending resets before returning. */
    2124          48 :         spdk_for_each_channel(nvme_ctrlr,
    2125             :                               bdev_nvme_complete_pending_resets,
    2126             :                               success ? NULL : (void *)0x1,
    2127             :                               _bdev_nvme_reset_ctrlr_complete);
    2128             : }
    2129             : 
    2130             : static void
    2131           0 : bdev_nvme_reset_create_qpairs_failed(struct spdk_io_channel_iter *i, int status)
    2132             : {
    2133           0 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    2134             : 
    2135           0 :         bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2136           0 : }
    2137             : 
    2138             : static void
    2139          62 : bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i)
    2140             : {
    2141          62 :         struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
    2142          62 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
    2143             :         struct nvme_qpair *nvme_qpair;
    2144             : 
    2145          62 :         nvme_qpair = ctrlr_ch->qpair;
    2146          62 :         assert(nvme_qpair != NULL);
    2147             : 
    2148          62 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    2149             : 
    2150          62 :         if (nvme_qpair->qpair != NULL) {
    2151          52 :                 if (nvme_qpair->ctrlr->dont_retry) {
    2152          39 :                         spdk_nvme_qpair_set_abort_dnr(nvme_qpair->qpair, true);
    2153             :                 }
    2154          52 :                 spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
    2155             : 
    2156             :                 /* The current full reset sequence will move to the next
    2157             :                  * ctrlr_channel after the qpair is actually disconnected.
    2158             :                  */
    2159          52 :                 assert(ctrlr_ch->reset_iter == NULL);
    2160          52 :                 ctrlr_ch->reset_iter = i;
    2161             :         } else {
    2162          10 :                 spdk_for_each_channel_continue(i, 0);
    2163             :         }
    2164          62 : }
    2165             : 
    2166             : static void
    2167          27 : bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status)
    2168             : {
    2169          27 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    2170             : 
    2171          27 :         if (status == 0) {
    2172          27 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, true);
    2173             :         } else {
    2174             :                 /* Delete the added qpairs and quiesce ctrlr to make the states clean. */
    2175           0 :                 spdk_for_each_channel(nvme_ctrlr,
    2176             :                                       bdev_nvme_reset_destroy_qpair,
    2177             :                                       NULL,
    2178             :                                       bdev_nvme_reset_create_qpairs_failed);
    2179             :         }
    2180          27 : }
    2181             : 
    2182             : static int
    2183          43 : bdev_nvme_reset_check_qpair_connected(void *ctx)
    2184             : {
    2185          43 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx;
    2186             : 
    2187          43 :         if (ctrlr_ch->reset_iter == NULL) {
    2188             :                 /* qpair was already failed to connect and the reset sequence is being aborted. */
    2189           0 :                 assert(ctrlr_ch->connect_poller == NULL);
    2190           0 :                 assert(ctrlr_ch->qpair->qpair == NULL);
    2191           0 :                 return SPDK_POLLER_BUSY;
    2192             :         }
    2193             : 
    2194          43 :         assert(ctrlr_ch->qpair->qpair != NULL);
    2195             : 
    2196          43 :         if (!spdk_nvme_qpair_is_connected(ctrlr_ch->qpair->qpair)) {
    2197           0 :                 return SPDK_POLLER_BUSY;
    2198             :         }
    2199             : 
    2200          43 :         spdk_poller_unregister(&ctrlr_ch->connect_poller);
    2201             : 
    2202             :         /* qpair was completed to connect. Move to the next ctrlr_channel */
    2203          43 :         spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
    2204          43 :         ctrlr_ch->reset_iter = NULL;
    2205             : 
    2206          43 :         if (!g_opts.disable_auto_failback) {
    2207          30 :                 _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
    2208             :         }
    2209             : 
    2210          43 :         return SPDK_POLLER_BUSY;
    2211             : }
    2212             : 
    2213             : static void
    2214          43 : bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i)
    2215             : {
    2216          43 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    2217          43 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
    2218             :         int rc;
    2219             : 
    2220          43 :         rc = bdev_nvme_create_qpair(ctrlr_ch->qpair);
    2221          43 :         if (rc == 0) {
    2222          43 :                 ctrlr_ch->connect_poller = SPDK_POLLER_REGISTER(bdev_nvme_reset_check_qpair_connected,
    2223             :                                            ctrlr_ch, 0);
    2224             : 
    2225             :                 /* The current full reset sequence will move to the next
    2226             :                  * ctrlr_channel after the qpair is actually connected.
    2227             :                  */
    2228          43 :                 assert(ctrlr_ch->reset_iter == NULL);
    2229          43 :                 ctrlr_ch->reset_iter = i;
    2230             :         } else {
    2231           0 :                 spdk_for_each_channel_continue(i, rc);
    2232             :         }
    2233          43 : }
    2234             : 
    2235             : static void
    2236          27 : nvme_ctrlr_check_namespaces(struct nvme_ctrlr *nvme_ctrlr)
    2237             : {
    2238          27 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    2239             :         struct nvme_ns *nvme_ns;
    2240             : 
    2241          27 :         for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    2242          39 :              nvme_ns != NULL;
    2243          12 :              nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
    2244          12 :                 if (!spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
    2245           1 :                         SPDK_DEBUGLOG(bdev_nvme, "NSID %u was removed during reset.\n", nvme_ns->id);
    2246             :                         /* NS can be added again. Just nullify nvme_ns->ns. */
    2247           1 :                         nvme_ns->ns = NULL;
    2248             :                 }
    2249             :         }
    2250          27 : }
    2251             : 
    2252             : 
    2253             : static int
    2254          49 : bdev_nvme_reconnect_ctrlr_poll(void *arg)
    2255             : {
    2256          49 :         struct nvme_ctrlr *nvme_ctrlr = arg;
    2257          49 :         int rc = -ETIMEDOUT;
    2258             : 
    2259          49 :         if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
    2260             :                 /* Mark the ctrlr as failed. The next call to
    2261             :                  * spdk_nvme_ctrlr_reconnect_poll_async() will then
    2262             :                  * do the necessary cleanup and return failure.
    2263             :                  */
    2264           2 :                 spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
    2265             :         }
    2266             : 
    2267          49 :         rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
    2268          49 :         if (rc == -EAGAIN) {
    2269           0 :                 return SPDK_POLLER_BUSY;
    2270             :         }
    2271             : 
    2272          49 :         spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
    2273          49 :         if (rc == 0) {
    2274          27 :                 nvme_ctrlr_check_namespaces(nvme_ctrlr);
    2275             : 
    2276             :                 /* Recreate all of the I/O queue pairs */
    2277          27 :                 spdk_for_each_channel(nvme_ctrlr,
    2278             :                                       bdev_nvme_reset_create_qpair,
    2279             :                                       NULL,
    2280             :                                       bdev_nvme_reset_create_qpairs_done);
    2281             :         } else {
    2282          22 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2283             :         }
    2284          49 :         return SPDK_POLLER_BUSY;
    2285             : }
    2286             : 
    2287             : static void
    2288          49 : bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2289             : {
    2290          49 :         spdk_nvme_ctrlr_reconnect_async(nvme_ctrlr->ctrlr);
    2291             : 
    2292             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect, nvme_ctrlr->nbdev_ctrlr->name);
    2293          49 :         assert(nvme_ctrlr->reset_detach_poller == NULL);
    2294          49 :         nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_ctrlr_poll,
    2295             :                                           nvme_ctrlr, 0);
    2296          49 : }
    2297             : 
    2298             : static void
    2299          36 : bdev_nvme_reset_destroy_qpair_done(struct spdk_io_channel_iter *i, int status)
    2300             : {
    2301          36 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    2302             : 
    2303             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reset, nvme_ctrlr->nbdev_ctrlr->name);
    2304          36 :         assert(status == 0);
    2305             : 
    2306          36 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2307           0 :                 bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2308             :         } else {
    2309          36 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
    2310             :         }
    2311          36 : }
    2312             : 
    2313             : static void
    2314          36 : bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
    2315             : {
    2316          36 :         spdk_for_each_channel(nvme_ctrlr,
    2317             :                               bdev_nvme_reset_destroy_qpair,
    2318             :                               NULL,
    2319             :                               bdev_nvme_reset_destroy_qpair_done);
    2320          36 : }
    2321             : 
    2322             : static void
    2323           3 : bdev_nvme_reconnect_ctrlr_now(void *ctx)
    2324             : {
    2325           3 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2326             : 
    2327           3 :         assert(nvme_ctrlr->resetting == true);
    2328           3 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2329             : 
    2330           3 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2331             : 
    2332           3 :         spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
    2333             : 
    2334           3 :         bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2335           3 : }
    2336             : 
    2337             : static void
    2338          36 : _bdev_nvme_reset_ctrlr(void *ctx)
    2339             : {
    2340          36 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2341             : 
    2342          36 :         assert(nvme_ctrlr->resetting == true);
    2343          36 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2344             : 
    2345          36 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2346           0 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reset_destroy_qpairs);
    2347             :         } else {
    2348          36 :                 bdev_nvme_reset_destroy_qpairs(nvme_ctrlr);
    2349             :         }
    2350          36 : }
    2351             : 
    2352             : static int
    2353          34 : bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2354             : {
    2355             :         spdk_msg_fn msg_fn;
    2356             : 
    2357          34 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2358          34 :         if (nvme_ctrlr->destruct) {
    2359           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2360           3 :                 return -ENXIO;
    2361             :         }
    2362             : 
    2363          31 :         if (nvme_ctrlr->resetting) {
    2364           6 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2365           6 :                 SPDK_NOTICELOG("Unable to perform reset, already in progress.\n");
    2366           6 :                 return -EBUSY;
    2367             :         }
    2368             : 
    2369          25 :         if (nvme_ctrlr->disabled) {
    2370           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2371           0 :                 SPDK_NOTICELOG("Unable to perform reset. Controller is disabled.\n");
    2372           0 :                 return -EALREADY;
    2373             :         }
    2374             : 
    2375          25 :         nvme_ctrlr->resetting = true;
    2376          25 :         nvme_ctrlr->dont_retry = true;
    2377             : 
    2378          25 :         if (nvme_ctrlr->reconnect_is_delayed) {
    2379           1 :                 SPDK_DEBUGLOG(bdev_nvme, "Reconnect is already scheduled.\n");
    2380           1 :                 msg_fn = bdev_nvme_reconnect_ctrlr_now;
    2381           1 :                 nvme_ctrlr->reconnect_is_delayed = false;
    2382             :         } else {
    2383          24 :                 msg_fn = _bdev_nvme_reset_ctrlr;
    2384          24 :                 assert(nvme_ctrlr->reset_start_tsc == 0);
    2385             :         }
    2386             : 
    2387          25 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2388             : 
    2389          25 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2390             : 
    2391          25 :         spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    2392          25 :         return 0;
    2393             : }
    2394             : 
    2395             : static int
    2396           3 : bdev_nvme_enable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2397             : {
    2398           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2399           3 :         if (nvme_ctrlr->destruct) {
    2400           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2401           0 :                 return -ENXIO;
    2402             :         }
    2403             : 
    2404           3 :         if (nvme_ctrlr->resetting) {
    2405           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2406           0 :                 return -EBUSY;
    2407             :         }
    2408             : 
    2409           3 :         if (!nvme_ctrlr->disabled) {
    2410           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2411           1 :                 return -EALREADY;
    2412             :         }
    2413             : 
    2414           2 :         nvme_ctrlr->disabled = false;
    2415           2 :         nvme_ctrlr->resetting = true;
    2416             : 
    2417           2 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2418             : 
    2419           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2420             : 
    2421           2 :         spdk_thread_send_msg(nvme_ctrlr->thread, bdev_nvme_reconnect_ctrlr_now, nvme_ctrlr);
    2422           2 :         return 0;
    2423             : }
    2424             : 
    2425             : static void
    2426           2 : _bdev_nvme_disable_ctrlr_complete(struct spdk_io_channel_iter *i, int status)
    2427             : {
    2428           2 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    2429           2 :         bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
    2430           2 :         void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
    2431             :         enum bdev_nvme_op_after_reset op_after_disable;
    2432             : 
    2433           2 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2434             : 
    2435           2 :         nvme_ctrlr->ctrlr_op_cb_fn = NULL;
    2436           2 :         nvme_ctrlr->ctrlr_op_cb_arg = NULL;
    2437             : 
    2438           2 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2439             : 
    2440           2 :         nvme_ctrlr->resetting = false;
    2441           2 :         nvme_ctrlr->dont_retry = false;
    2442             : 
    2443           2 :         op_after_disable = bdev_nvme_check_op_after_reset(nvme_ctrlr, true);
    2444             : 
    2445           2 :         nvme_ctrlr->disabled = true;
    2446           2 :         spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
    2447             : 
    2448           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2449             : 
    2450           2 :         if (ctrlr_op_cb_fn) {
    2451           0 :                 ctrlr_op_cb_fn(ctrlr_op_cb_arg, 0);
    2452             :         }
    2453             : 
    2454           2 :         switch (op_after_disable) {
    2455           0 :         case OP_COMPLETE_PENDING_DESTRUCT:
    2456           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    2457           0 :                 break;
    2458           2 :         default:
    2459           2 :                 break;
    2460             :         }
    2461             : 
    2462           2 : }
    2463             : 
    2464             : static void
    2465           2 : bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr)
    2466             : {
    2467             :         /* Make sure we clear any pending resets before returning. */
    2468           2 :         spdk_for_each_channel(nvme_ctrlr,
    2469             :                               bdev_nvme_complete_pending_resets,
    2470             :                               NULL,
    2471             :                               _bdev_nvme_disable_ctrlr_complete);
    2472           2 : }
    2473             : 
    2474             : static void
    2475           1 : bdev_nvme_disable_destroy_qpairs_done(struct spdk_io_channel_iter *i, int status)
    2476             : {
    2477           1 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    2478             : 
    2479           1 :         assert(status == 0);
    2480             : 
    2481           1 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2482           0 :                 bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
    2483             :         } else {
    2484           1 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_ctrlr_complete);
    2485             :         }
    2486           1 : }
    2487             : 
    2488             : static void
    2489           1 : bdev_nvme_disable_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
    2490             : {
    2491           1 :         spdk_for_each_channel(nvme_ctrlr,
    2492             :                               bdev_nvme_reset_destroy_qpair,
    2493             :                               NULL,
    2494             :                               bdev_nvme_disable_destroy_qpairs_done);
    2495           1 : }
    2496             : 
    2497             : static void
    2498           1 : _bdev_nvme_cancel_reconnect_and_disable_ctrlr(void *ctx)
    2499             : {
    2500           1 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2501             : 
    2502           1 :         assert(nvme_ctrlr->resetting == true);
    2503           1 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2504             : 
    2505           1 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2506             : 
    2507           1 :         bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
    2508           1 : }
    2509             : 
    2510             : static void
    2511           1 : _bdev_nvme_disconnect_and_disable_ctrlr(void *ctx)
    2512             : {
    2513           1 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2514             : 
    2515           1 :         assert(nvme_ctrlr->resetting == true);
    2516           1 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2517             : 
    2518           1 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2519           0 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_destroy_qpairs);
    2520             :         } else {
    2521           1 :                 bdev_nvme_disable_destroy_qpairs(nvme_ctrlr);
    2522             :         }
    2523           1 : }
    2524             : 
    2525             : static int
    2526           5 : bdev_nvme_disable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2527             : {
    2528             :         spdk_msg_fn msg_fn;
    2529             : 
    2530           5 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2531           5 :         if (nvme_ctrlr->destruct) {
    2532           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2533           1 :                 return -ENXIO;
    2534             :         }
    2535             : 
    2536           4 :         if (nvme_ctrlr->resetting) {
    2537           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2538           1 :                 return -EBUSY;
    2539             :         }
    2540             : 
    2541           3 :         if (nvme_ctrlr->disabled) {
    2542           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2543           1 :                 return -EALREADY;
    2544             :         }
    2545             : 
    2546           2 :         nvme_ctrlr->resetting = true;
    2547           2 :         nvme_ctrlr->dont_retry = true;
    2548             : 
    2549           2 :         if (nvme_ctrlr->reconnect_is_delayed) {
    2550           1 :                 msg_fn = _bdev_nvme_cancel_reconnect_and_disable_ctrlr;
    2551           1 :                 nvme_ctrlr->reconnect_is_delayed = false;
    2552             :         } else {
    2553           1 :                 msg_fn = _bdev_nvme_disconnect_and_disable_ctrlr;
    2554             :         }
    2555             : 
    2556           2 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2557             : 
    2558           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2559             : 
    2560           2 :         spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    2561           2 :         return 0;
    2562             : }
    2563             : 
    2564             : static int
    2565          16 : nvme_ctrlr_op(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
    2566             :               bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2567             : {
    2568             :         int rc;
    2569             : 
    2570          16 :         switch (op) {
    2571          15 :         case NVME_CTRLR_OP_RESET:
    2572          15 :                 rc = bdev_nvme_reset_ctrlr(nvme_ctrlr);
    2573          15 :                 break;
    2574           0 :         case NVME_CTRLR_OP_ENABLE:
    2575           0 :                 rc = bdev_nvme_enable_ctrlr(nvme_ctrlr);
    2576           0 :                 break;
    2577           0 :         case NVME_CTRLR_OP_DISABLE:
    2578           0 :                 rc = bdev_nvme_disable_ctrlr(nvme_ctrlr);
    2579           0 :                 break;
    2580           1 :         default:
    2581           1 :                 rc = -EINVAL;
    2582           1 :                 break;
    2583             :         }
    2584             : 
    2585          16 :         if (rc == 0) {
    2586           9 :                 assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
    2587           9 :                 assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
    2588           9 :                 nvme_ctrlr->ctrlr_op_cb_fn = cb_fn;
    2589           9 :                 nvme_ctrlr->ctrlr_op_cb_arg = cb_arg;
    2590             :         }
    2591          16 :         return rc;
    2592             : }
    2593             : 
    2594             : struct nvme_ctrlr_op_rpc_ctx {
    2595             :         struct nvme_ctrlr *nvme_ctrlr;
    2596             :         struct spdk_thread *orig_thread;
    2597             :         enum nvme_ctrlr_op op;
    2598             :         int rc;
    2599             :         bdev_nvme_ctrlr_op_cb cb_fn;
    2600             :         void *cb_arg;
    2601             : };
    2602             : 
    2603             : static void
    2604           4 : _nvme_ctrlr_op_rpc_complete(void *_ctx)
    2605             : {
    2606           4 :         struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
    2607             : 
    2608           4 :         assert(ctx != NULL);
    2609           4 :         assert(ctx->cb_fn != NULL);
    2610             : 
    2611           4 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    2612             : 
    2613           4 :         free(ctx);
    2614           4 : }
    2615             : 
    2616             : static void
    2617           4 : nvme_ctrlr_op_rpc_complete(void *cb_arg, int rc)
    2618             : {
    2619           4 :         struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
    2620             : 
    2621           4 :         ctx->rc = rc;
    2622             : 
    2623           4 :         spdk_thread_send_msg(ctx->orig_thread, _nvme_ctrlr_op_rpc_complete, ctx);
    2624           4 : }
    2625             : 
    2626             : void
    2627           4 : nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
    2628             :                   bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2629             : {
    2630             :         struct nvme_ctrlr_op_rpc_ctx *ctx;
    2631             :         int rc;
    2632             : 
    2633           4 :         assert(cb_fn != NULL);
    2634             : 
    2635           4 :         ctx = calloc(1, sizeof(*ctx));
    2636           4 :         if (ctx == NULL) {
    2637           0 :                 SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
    2638           0 :                 cb_fn(cb_arg, -ENOMEM);
    2639           0 :                 return;
    2640             :         }
    2641             : 
    2642           4 :         ctx->orig_thread = spdk_get_thread();
    2643           4 :         ctx->cb_fn = cb_fn;
    2644           4 :         ctx->cb_arg = cb_arg;
    2645             : 
    2646           4 :         rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_ctrlr_op_rpc_complete, ctx);
    2647           4 :         if (rc == 0) {
    2648           1 :                 return;
    2649           3 :         } else if (rc == -EALREADY) {
    2650           0 :                 rc = 0;
    2651             :         }
    2652             : 
    2653           3 :         nvme_ctrlr_op_rpc_complete(ctx, rc);
    2654             : }
    2655             : 
    2656             : static void nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc);
    2657             : 
    2658             : static void
    2659           2 : _nvme_bdev_ctrlr_op_rpc_continue(void *_ctx)
    2660             : {
    2661           2 :         struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
    2662             :         struct nvme_ctrlr *prev_nvme_ctrlr, *next_nvme_ctrlr;
    2663             :         int rc;
    2664             : 
    2665           2 :         prev_nvme_ctrlr = ctx->nvme_ctrlr;
    2666           2 :         ctx->nvme_ctrlr = NULL;
    2667             : 
    2668           2 :         if (ctx->rc != 0) {
    2669           0 :                 goto complete;
    2670             :         }
    2671             : 
    2672           2 :         next_nvme_ctrlr = TAILQ_NEXT(prev_nvme_ctrlr, tailq);
    2673           2 :         if (next_nvme_ctrlr == NULL) {
    2674           1 :                 goto complete;
    2675             :         }
    2676             : 
    2677           1 :         rc = nvme_ctrlr_op(next_nvme_ctrlr, ctx->op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2678           1 :         if (rc == 0) {
    2679           1 :                 ctx->nvme_ctrlr = next_nvme_ctrlr;
    2680           1 :                 return;
    2681           0 :         } else if (rc == -EALREADY) {
    2682           0 :                 ctx->nvme_ctrlr = next_nvme_ctrlr;
    2683           0 :                 rc = 0;
    2684             :         }
    2685             : 
    2686           0 :         ctx->rc = rc;
    2687             : 
    2688           1 : complete:
    2689           1 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    2690           1 :         free(ctx);
    2691             : }
    2692             : 
    2693             : static void
    2694           2 : nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc)
    2695             : {
    2696           2 :         struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
    2697             : 
    2698           2 :         ctx->rc = rc;
    2699             : 
    2700           2 :         spdk_thread_send_msg(ctx->orig_thread, _nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2701           2 : }
    2702             : 
    2703             : void
    2704           1 : nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op,
    2705             :                        bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2706             : {
    2707             :         struct nvme_ctrlr_op_rpc_ctx *ctx;
    2708             :         struct nvme_ctrlr *nvme_ctrlr;
    2709             :         int rc;
    2710             : 
    2711           1 :         assert(cb_fn != NULL);
    2712             : 
    2713           1 :         ctx = calloc(1, sizeof(*ctx));
    2714           1 :         if (ctx == NULL) {
    2715           0 :                 SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
    2716           0 :                 cb_fn(cb_arg, -ENOMEM);
    2717           0 :                 return;
    2718             :         }
    2719             : 
    2720           1 :         ctx->orig_thread = spdk_get_thread();
    2721           1 :         ctx->op = op;
    2722           1 :         ctx->cb_fn = cb_fn;
    2723           1 :         ctx->cb_arg = cb_arg;
    2724             : 
    2725           1 :         nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
    2726           1 :         assert(nvme_ctrlr != NULL);
    2727             : 
    2728           1 :         rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2729           1 :         if (rc == 0) {
    2730           1 :                 ctx->nvme_ctrlr = nvme_ctrlr;
    2731           1 :                 return;
    2732           0 :         } else if (rc == -EALREADY) {
    2733           0 :                 ctx->nvme_ctrlr = nvme_ctrlr;
    2734           0 :                 rc = 0;
    2735             :         }
    2736             : 
    2737           0 :         nvme_bdev_ctrlr_op_rpc_continue(ctx, rc);
    2738             : }
    2739             : 
    2740             : static int _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio);
    2741             : 
    2742             : static void
    2743           7 : _bdev_nvme_reset_io_complete(struct spdk_io_channel_iter *i, int status)
    2744             : {
    2745           7 :         struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i);
    2746             :         enum spdk_bdev_io_status io_status;
    2747             : 
    2748           7 :         if (bio->cpl.cdw0 == 0) {
    2749           5 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    2750             :         } else {
    2751           2 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    2752             :         }
    2753             : 
    2754           7 :         __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), io_status, NULL);
    2755           7 : }
    2756             : 
    2757             : static void
    2758          14 : bdev_nvme_abort_bdev_channel(struct spdk_io_channel_iter *i)
    2759             : {
    2760          14 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    2761          14 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
    2762             : 
    2763          14 :         bdev_nvme_abort_retry_ios(nbdev_ch);
    2764             : 
    2765          14 :         spdk_for_each_channel_continue(i, 0);
    2766          14 : }
    2767             : 
    2768             : static void
    2769           7 : bdev_nvme_reset_io_complete(struct nvme_bdev_io *bio)
    2770             : {
    2771           7 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    2772           7 :         struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
    2773             : 
    2774             :         /* Abort all queued I/Os for retry. */
    2775           7 :         spdk_for_each_channel(nbdev,
    2776             :                               bdev_nvme_abort_bdev_channel,
    2777             :                               bio,
    2778             :                               _bdev_nvme_reset_io_complete);
    2779           7 : }
    2780             : 
    2781             : static void
    2782          10 : _bdev_nvme_reset_io_continue(void *ctx)
    2783             : {
    2784          10 :         struct nvme_bdev_io *bio = ctx;
    2785             :         struct nvme_io_path *prev_io_path, *next_io_path;
    2786             :         int rc;
    2787             : 
    2788          10 :         prev_io_path = bio->io_path;
    2789          10 :         bio->io_path = NULL;
    2790             : 
    2791          10 :         if (bio->cpl.cdw0 != 0) {
    2792           2 :                 goto complete;
    2793             :         }
    2794             : 
    2795           8 :         next_io_path = STAILQ_NEXT(prev_io_path, stailq);
    2796           8 :         if (next_io_path == NULL) {
    2797           5 :                 goto complete;
    2798             :         }
    2799             : 
    2800           3 :         rc = _bdev_nvme_reset_io(next_io_path, bio);
    2801           3 :         if (rc == 0) {
    2802           3 :                 return;
    2803             :         }
    2804             : 
    2805           0 :         bio->cpl.cdw0 = 1;
    2806             : 
    2807           7 : complete:
    2808           7 :         bdev_nvme_reset_io_complete(bio);
    2809             : }
    2810             : 
    2811             : static void
    2812          10 : bdev_nvme_reset_io_continue(void *cb_arg, int rc)
    2813             : {
    2814          10 :         struct nvme_bdev_io *bio = cb_arg;
    2815          10 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    2816             : 
    2817          10 :         bio->cpl.cdw0 = (rc == 0) ? 0 : 1;
    2818             : 
    2819          10 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), _bdev_nvme_reset_io_continue, bio);
    2820          10 : }
    2821             : 
    2822             : static int
    2823          10 : _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
    2824             : {
    2825             :         struct nvme_ctrlr_channel *ctrlr_ch;
    2826             :         int rc;
    2827             : 
    2828          10 :         rc = nvme_ctrlr_op(io_path->qpair->ctrlr, NVME_CTRLR_OP_RESET,
    2829             :                            bdev_nvme_reset_io_continue, bio);
    2830          10 :         if (rc != 0 && rc != -EBUSY) {
    2831           0 :                 return rc;
    2832             :         }
    2833             : 
    2834          10 :         assert(bio->io_path == NULL);
    2835          10 :         bio->io_path = io_path;
    2836             : 
    2837          10 :         if (rc == -EBUSY) {
    2838           4 :                 ctrlr_ch = io_path->qpair->ctrlr_ch;
    2839           4 :                 assert(ctrlr_ch != NULL);
    2840             :                 /*
    2841             :                  * Reset call is queued only if it is from the app framework. This is on purpose so that
    2842             :                  * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
    2843             :                  * upper level. If they are in the middle of a reset, we won't try to schedule another one.
    2844             :                  */
    2845           4 :                 TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bio, retry_link);
    2846             :         }
    2847             : 
    2848          10 :         return 0;
    2849             : }
    2850             : 
    2851             : static void
    2852           7 : bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio)
    2853             : {
    2854             :         struct nvme_io_path *io_path;
    2855             :         int rc;
    2856             : 
    2857           7 :         bio->cpl.cdw0 = 0;
    2858             : 
    2859             :         /* Reset all nvme_ctrlrs of a bdev controller sequentially. */
    2860           7 :         io_path = STAILQ_FIRST(&nbdev_ch->io_path_list);
    2861           7 :         assert(io_path != NULL);
    2862             : 
    2863           7 :         rc = _bdev_nvme_reset_io(io_path, bio);
    2864           7 :         if (rc != 0) {
    2865             :                 /* If the current nvme_ctrlr is disabled, skip it and move to the next nvme_ctrlr. */
    2866           0 :                 rc = (rc == -EALREADY) ? 0 : rc;
    2867             : 
    2868           0 :                 bdev_nvme_reset_io_continue(bio, rc);
    2869             :         }
    2870           7 : }
    2871             : 
    2872             : static int
    2873          18 : bdev_nvme_failover_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool remove)
    2874             : {
    2875          18 :         if (nvme_ctrlr->destruct) {
    2876             :                 /* Don't bother resetting if the controller is in the process of being destructed. */
    2877           2 :                 return -ENXIO;
    2878             :         }
    2879             : 
    2880          16 :         if (nvme_ctrlr->resetting) {
    2881           3 :                 if (!nvme_ctrlr->in_failover) {
    2882           3 :                         SPDK_NOTICELOG("Reset is already in progress. Defer failover until reset completes.\n");
    2883             : 
    2884             :                         /* Defer failover until reset completes. */
    2885           3 :                         nvme_ctrlr->pending_failover = true;
    2886           3 :                         return -EINPROGRESS;
    2887             :                 } else {
    2888           0 :                         SPDK_NOTICELOG("Unable to perform failover, already in progress.\n");
    2889           0 :                         return -EBUSY;
    2890             :                 }
    2891             :         }
    2892             : 
    2893          13 :         bdev_nvme_failover_trid(nvme_ctrlr, remove, true);
    2894             : 
    2895          13 :         if (nvme_ctrlr->reconnect_is_delayed) {
    2896           1 :                 SPDK_NOTICELOG("Reconnect is already scheduled.\n");
    2897             : 
    2898             :                 /* We rely on the next reconnect for the failover. */
    2899           1 :                 return -EALREADY;
    2900             :         }
    2901             : 
    2902          12 :         if (nvme_ctrlr->disabled) {
    2903           0 :                 SPDK_NOTICELOG("Controller is disabled.\n");
    2904             : 
    2905             :                 /* We rely on the enablement for the failover. */
    2906           0 :                 return -EALREADY;
    2907             :         }
    2908             : 
    2909          12 :         nvme_ctrlr->resetting = true;
    2910          12 :         nvme_ctrlr->in_failover = true;
    2911             : 
    2912          12 :         assert(nvme_ctrlr->reset_start_tsc == 0);
    2913          12 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2914             : 
    2915          12 :         return 0;
    2916             : }
    2917             : 
    2918             : static int
    2919          16 : bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2920             : {
    2921             :         int rc;
    2922             : 
    2923          16 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2924          16 :         rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, false);
    2925          16 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2926             : 
    2927          16 :         if (rc == 0) {
    2928          11 :                 spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset_ctrlr, nvme_ctrlr);
    2929           5 :         } else if (rc == -EALREADY) {
    2930           0 :                 rc = 0;
    2931             :         }
    2932             : 
    2933          16 :         return rc;
    2934             : }
    2935             : 
    2936             : static int bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks,
    2937             :                            uint64_t num_blocks);
    2938             : 
    2939             : static int bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks,
    2940             :                                   uint64_t num_blocks);
    2941             : 
    2942             : static int bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks,
    2943             :                           uint64_t src_offset_blocks,
    2944             :                           uint64_t num_blocks);
    2945             : 
    2946             : static void
    2947           1 : bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
    2948             :                      bool success)
    2949             : {
    2950           1 :         struct nvme_bdev_io *bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    2951             :         int ret;
    2952             : 
    2953           1 :         if (!success) {
    2954           0 :                 ret = -EINVAL;
    2955           0 :                 goto exit;
    2956             :         }
    2957             : 
    2958           1 :         if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
    2959           0 :                 ret = -ENXIO;
    2960           0 :                 goto exit;
    2961             :         }
    2962             : 
    2963           1 :         ret = bdev_nvme_readv(bio,
    2964             :                               bdev_io->u.bdev.iovs,
    2965             :                               bdev_io->u.bdev.iovcnt,
    2966             :                               bdev_io->u.bdev.md_buf,
    2967             :                               bdev_io->u.bdev.num_blocks,
    2968             :                               bdev_io->u.bdev.offset_blocks,
    2969             :                               bdev_io->u.bdev.dif_check_flags,
    2970             :                               bdev_io->u.bdev.memory_domain,
    2971             :                               bdev_io->u.bdev.memory_domain_ctx,
    2972             :                               bdev_io->u.bdev.accel_sequence);
    2973             : 
    2974           1 : exit:
    2975           1 :         if (spdk_unlikely(ret != 0)) {
    2976           0 :                 bdev_nvme_io_complete(bio, ret);
    2977             :         }
    2978           1 : }
    2979             : 
    2980             : static inline void
    2981          51 : _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
    2982             : {
    2983          51 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    2984          51 :         struct spdk_bdev *bdev = bdev_io->bdev;
    2985             :         struct nvme_bdev_io *nbdev_io_to_abort;
    2986          51 :         int rc = 0;
    2987             : 
    2988          51 :         switch (bdev_io->type) {
    2989           3 :         case SPDK_BDEV_IO_TYPE_READ:
    2990           3 :                 if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) {
    2991             : 
    2992           2 :                         rc = bdev_nvme_readv(nbdev_io,
    2993             :                                              bdev_io->u.bdev.iovs,
    2994             :                                              bdev_io->u.bdev.iovcnt,
    2995             :                                              bdev_io->u.bdev.md_buf,
    2996             :                                              bdev_io->u.bdev.num_blocks,
    2997             :                                              bdev_io->u.bdev.offset_blocks,
    2998             :                                              bdev_io->u.bdev.dif_check_flags,
    2999             :                                              bdev_io->u.bdev.memory_domain,
    3000             :                                              bdev_io->u.bdev.memory_domain_ctx,
    3001             :                                              bdev_io->u.bdev.accel_sequence);
    3002             :                 } else {
    3003           1 :                         spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
    3004           1 :                                              bdev_io->u.bdev.num_blocks * bdev->blocklen);
    3005           1 :                         rc = 0;
    3006             :                 }
    3007           3 :                 break;
    3008          25 :         case SPDK_BDEV_IO_TYPE_WRITE:
    3009          25 :                 rc = bdev_nvme_writev(nbdev_io,
    3010             :                                       bdev_io->u.bdev.iovs,
    3011             :                                       bdev_io->u.bdev.iovcnt,
    3012             :                                       bdev_io->u.bdev.md_buf,
    3013             :                                       bdev_io->u.bdev.num_blocks,
    3014             :                                       bdev_io->u.bdev.offset_blocks,
    3015             :                                       bdev_io->u.bdev.dif_check_flags,
    3016             :                                       bdev_io->u.bdev.memory_domain,
    3017             :                                       bdev_io->u.bdev.memory_domain_ctx,
    3018             :                                       bdev_io->u.bdev.accel_sequence,
    3019             :                                       bdev_io->u.bdev.nvme_cdw12,
    3020             :                                       bdev_io->u.bdev.nvme_cdw13);
    3021          25 :                 break;
    3022           1 :         case SPDK_BDEV_IO_TYPE_COMPARE:
    3023           1 :                 rc = bdev_nvme_comparev(nbdev_io,
    3024             :                                         bdev_io->u.bdev.iovs,
    3025             :                                         bdev_io->u.bdev.iovcnt,
    3026             :                                         bdev_io->u.bdev.md_buf,
    3027             :                                         bdev_io->u.bdev.num_blocks,
    3028             :                                         bdev_io->u.bdev.offset_blocks,
    3029             :                                         bdev_io->u.bdev.dif_check_flags);
    3030           1 :                 break;
    3031           2 :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
    3032           2 :                 rc = bdev_nvme_comparev_and_writev(nbdev_io,
    3033             :                                                    bdev_io->u.bdev.iovs,
    3034             :                                                    bdev_io->u.bdev.iovcnt,
    3035             :                                                    bdev_io->u.bdev.fused_iovs,
    3036             :                                                    bdev_io->u.bdev.fused_iovcnt,
    3037             :                                                    bdev_io->u.bdev.md_buf,
    3038             :                                                    bdev_io->u.bdev.num_blocks,
    3039             :                                                    bdev_io->u.bdev.offset_blocks,
    3040             :                                                    bdev_io->u.bdev.dif_check_flags);
    3041           2 :                 break;
    3042           1 :         case SPDK_BDEV_IO_TYPE_UNMAP:
    3043           1 :                 rc = bdev_nvme_unmap(nbdev_io,
    3044             :                                      bdev_io->u.bdev.offset_blocks,
    3045             :                                      bdev_io->u.bdev.num_blocks);
    3046           1 :                 break;
    3047           0 :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
    3048           0 :                 rc =  bdev_nvme_write_zeroes(nbdev_io,
    3049             :                                              bdev_io->u.bdev.offset_blocks,
    3050             :                                              bdev_io->u.bdev.num_blocks);
    3051           0 :                 break;
    3052           7 :         case SPDK_BDEV_IO_TYPE_RESET:
    3053           7 :                 nbdev_io->io_path = NULL;
    3054           7 :                 bdev_nvme_reset_io(nbdev_ch, nbdev_io);
    3055           7 :                 return;
    3056             : 
    3057           1 :         case SPDK_BDEV_IO_TYPE_FLUSH:
    3058           1 :                 bdev_nvme_io_complete(nbdev_io, 0);
    3059           1 :                 return;
    3060             : 
    3061           0 :         case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
    3062           0 :                 rc = bdev_nvme_zone_appendv(nbdev_io,
    3063             :                                             bdev_io->u.bdev.iovs,
    3064             :                                             bdev_io->u.bdev.iovcnt,
    3065             :                                             bdev_io->u.bdev.md_buf,
    3066             :                                             bdev_io->u.bdev.num_blocks,
    3067             :                                             bdev_io->u.bdev.offset_blocks,
    3068             :                                             bdev_io->u.bdev.dif_check_flags);
    3069           0 :                 break;
    3070           0 :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
    3071           0 :                 rc = bdev_nvme_get_zone_info(nbdev_io,
    3072             :                                              bdev_io->u.zone_mgmt.zone_id,
    3073             :                                              bdev_io->u.zone_mgmt.num_zones,
    3074           0 :                                              bdev_io->u.zone_mgmt.buf);
    3075           0 :                 break;
    3076           0 :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
    3077           0 :                 rc = bdev_nvme_zone_management(nbdev_io,
    3078             :                                                bdev_io->u.zone_mgmt.zone_id,
    3079             :                                                bdev_io->u.zone_mgmt.zone_action);
    3080           0 :                 break;
    3081           5 :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3082           5 :                 nbdev_io->io_path = NULL;
    3083           5 :                 bdev_nvme_admin_passthru(nbdev_ch,
    3084             :                                          nbdev_io,
    3085             :                                          &bdev_io->u.nvme_passthru.cmd,
    3086             :                                          bdev_io->u.nvme_passthru.buf,
    3087             :                                          bdev_io->u.nvme_passthru.nbytes);
    3088           5 :                 return;
    3089             : 
    3090           0 :         case SPDK_BDEV_IO_TYPE_NVME_IO:
    3091           0 :                 rc = bdev_nvme_io_passthru(nbdev_io,
    3092             :                                            &bdev_io->u.nvme_passthru.cmd,
    3093             :                                            bdev_io->u.nvme_passthru.buf,
    3094             :                                            bdev_io->u.nvme_passthru.nbytes);
    3095           0 :                 break;
    3096           0 :         case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3097           0 :                 rc = bdev_nvme_io_passthru_md(nbdev_io,
    3098             :                                               &bdev_io->u.nvme_passthru.cmd,
    3099             :                                               bdev_io->u.nvme_passthru.buf,
    3100             :                                               bdev_io->u.nvme_passthru.nbytes,
    3101             :                                               bdev_io->u.nvme_passthru.md_buf,
    3102             :                                               bdev_io->u.nvme_passthru.md_len);
    3103           0 :                 break;
    3104           0 :         case SPDK_BDEV_IO_TYPE_NVME_IOV_MD:
    3105           0 :                 rc = bdev_nvme_iov_passthru_md(nbdev_io,
    3106             :                                                &bdev_io->u.nvme_passthru.cmd,
    3107             :                                                bdev_io->u.nvme_passthru.iovs,
    3108             :                                                bdev_io->u.nvme_passthru.iovcnt,
    3109             :                                                bdev_io->u.nvme_passthru.nbytes,
    3110             :                                                bdev_io->u.nvme_passthru.md_buf,
    3111             :                                                bdev_io->u.nvme_passthru.md_len);
    3112           0 :                 break;
    3113           6 :         case SPDK_BDEV_IO_TYPE_ABORT:
    3114           6 :                 nbdev_io->io_path = NULL;
    3115           6 :                 nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
    3116           6 :                 bdev_nvme_abort(nbdev_ch,
    3117             :                                 nbdev_io,
    3118             :                                 nbdev_io_to_abort);
    3119           6 :                 return;
    3120             : 
    3121           0 :         case SPDK_BDEV_IO_TYPE_COPY:
    3122           0 :                 rc = bdev_nvme_copy(nbdev_io,
    3123             :                                     bdev_io->u.bdev.offset_blocks,
    3124             :                                     bdev_io->u.bdev.copy.src_offset_blocks,
    3125             :                                     bdev_io->u.bdev.num_blocks);
    3126           0 :                 break;
    3127           0 :         default:
    3128           0 :                 rc = -EINVAL;
    3129           0 :                 break;
    3130             :         }
    3131             : 
    3132          32 :         if (spdk_unlikely(rc != 0)) {
    3133           0 :                 bdev_nvme_io_complete(nbdev_io, rc);
    3134             :         }
    3135             : }
    3136             : 
    3137             : static void
    3138          58 : bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
    3139             : {
    3140          58 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
    3141          58 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    3142             : 
    3143          58 :         if (spdk_likely(nbdev_io->submit_tsc == 0)) {
    3144          58 :                 nbdev_io->submit_tsc = spdk_bdev_io_get_submit_tsc(bdev_io);
    3145             :         } else {
    3146             :                 /* There are cases where submit_tsc != 0, i.e. retry I/O.
    3147             :                  * We need to update submit_tsc here.
    3148             :                  */
    3149           0 :                 nbdev_io->submit_tsc = spdk_get_ticks();
    3150             :         }
    3151             : 
    3152          58 :         spdk_trace_record(TRACE_BDEV_NVME_IO_START, 0, 0, (uintptr_t)nbdev_io, (uintptr_t)bdev_io);
    3153          58 :         nbdev_io->io_path = bdev_nvme_find_io_path(nbdev_ch);
    3154          58 :         if (spdk_unlikely(!nbdev_io->io_path)) {
    3155          11 :                 if (!bdev_nvme_io_type_is_admin(bdev_io->type)) {
    3156          10 :                         bdev_nvme_io_complete(nbdev_io, -ENXIO);
    3157          10 :                         return;
    3158             :                 }
    3159             : 
    3160             :                 /* Admin commands do not use the optimal I/O path.
    3161             :                  * Simply fall through even if it is not found.
    3162             :                  */
    3163             :         }
    3164             : 
    3165          48 :         _bdev_nvme_submit_request(nbdev_ch, bdev_io);
    3166             : }
    3167             : 
    3168             : static bool
    3169           0 : bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
    3170             : {
    3171           0 :         struct nvme_bdev *nbdev = ctx;
    3172             :         struct nvme_ns *nvme_ns;
    3173             :         struct spdk_nvme_ns *ns;
    3174             :         struct spdk_nvme_ctrlr *ctrlr;
    3175             :         const struct spdk_nvme_ctrlr_data *cdata;
    3176             : 
    3177           0 :         nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    3178           0 :         assert(nvme_ns != NULL);
    3179           0 :         ns = nvme_ns->ns;
    3180           0 :         if (ns == NULL) {
    3181           0 :                 return false;
    3182             :         }
    3183             : 
    3184           0 :         ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    3185             : 
    3186           0 :         switch (io_type) {
    3187           0 :         case SPDK_BDEV_IO_TYPE_READ:
    3188             :         case SPDK_BDEV_IO_TYPE_WRITE:
    3189             :         case SPDK_BDEV_IO_TYPE_RESET:
    3190             :         case SPDK_BDEV_IO_TYPE_FLUSH:
    3191             :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3192             :         case SPDK_BDEV_IO_TYPE_NVME_IO:
    3193             :         case SPDK_BDEV_IO_TYPE_ABORT:
    3194           0 :                 return true;
    3195             : 
    3196           0 :         case SPDK_BDEV_IO_TYPE_COMPARE:
    3197           0 :                 return spdk_nvme_ns_supports_compare(ns);
    3198             : 
    3199           0 :         case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3200           0 :                 return spdk_nvme_ns_get_md_size(ns) ? true : false;
    3201             : 
    3202           0 :         case SPDK_BDEV_IO_TYPE_UNMAP:
    3203           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3204           0 :                 return cdata->oncs.dsm;
    3205             : 
    3206           0 :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
    3207           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3208           0 :                 return cdata->oncs.write_zeroes;
    3209             : 
    3210           0 :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
    3211           0 :                 if (spdk_nvme_ctrlr_get_flags(ctrlr) &
    3212             :                     SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) {
    3213           0 :                         return true;
    3214             :                 }
    3215           0 :                 return false;
    3216             : 
    3217           0 :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
    3218             :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
    3219           0 :                 return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS;
    3220             : 
    3221           0 :         case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
    3222           0 :                 return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS &&
    3223           0 :                        spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
    3224             : 
    3225           0 :         case SPDK_BDEV_IO_TYPE_COPY:
    3226           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3227           0 :                 return cdata->oncs.copy;
    3228             : 
    3229           0 :         default:
    3230           0 :                 return false;
    3231             :         }
    3232             : }
    3233             : 
    3234             : static int
    3235          57 : nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrlr_ch)
    3236             : {
    3237             :         struct nvme_qpair *nvme_qpair;
    3238             :         struct spdk_io_channel *pg_ch;
    3239             :         int rc;
    3240             : 
    3241          57 :         nvme_qpair = calloc(1, sizeof(*nvme_qpair));
    3242          57 :         if (!nvme_qpair) {
    3243           0 :                 SPDK_ERRLOG("Failed to alloc nvme_qpair.\n");
    3244           0 :                 return -1;
    3245             :         }
    3246             : 
    3247          57 :         TAILQ_INIT(&nvme_qpair->io_path_list);
    3248             : 
    3249          57 :         nvme_qpair->ctrlr = nvme_ctrlr;
    3250          57 :         nvme_qpair->ctrlr_ch = ctrlr_ch;
    3251             : 
    3252          57 :         pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);
    3253          57 :         if (!pg_ch) {
    3254           0 :                 free(nvme_qpair);
    3255           0 :                 return -1;
    3256             :         }
    3257             : 
    3258          57 :         nvme_qpair->group = spdk_io_channel_get_ctx(pg_ch);
    3259             : 
    3260             : #ifdef SPDK_CONFIG_VTUNE
    3261             :         nvme_qpair->group->collect_spin_stat = true;
    3262             : #else
    3263          57 :         nvme_qpair->group->collect_spin_stat = false;
    3264             : #endif
    3265             : 
    3266          57 :         if (!nvme_ctrlr->disabled) {
    3267             :                 /* If a nvme_ctrlr is disabled, don't try to create qpair for it. Qpair will
    3268             :                  * be created when it's enabled.
    3269             :                  */
    3270          57 :                 rc = bdev_nvme_create_qpair(nvme_qpair);
    3271          57 :                 if (rc != 0) {
    3272             :                         /* nvme_ctrlr can't create IO qpair if connection is down.
    3273             :                          * If reconnect_delay_sec is non-zero, creating IO qpair is retried
    3274             :                          * after reconnect_delay_sec seconds. If bdev_retry_count is non-zero,
    3275             :                          * submitted IO will be queued until IO qpair is successfully created.
    3276             :                          *
    3277             :                          * Hence, if both are satisfied, ignore the failure.
    3278             :                          */
    3279           0 :                         if (nvme_ctrlr->opts.reconnect_delay_sec == 0 || g_opts.bdev_retry_count == 0) {
    3280           0 :                                 spdk_put_io_channel(pg_ch);
    3281           0 :                                 free(nvme_qpair);
    3282           0 :                                 return rc;
    3283             :                         }
    3284             :                 }
    3285             :         }
    3286             : 
    3287          57 :         TAILQ_INSERT_TAIL(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
    3288             : 
    3289          57 :         ctrlr_ch->qpair = nvme_qpair;
    3290             : 
    3291          57 :         pthread_mutex_lock(&nvme_qpair->ctrlr->mutex);
    3292          57 :         nvme_qpair->ctrlr->ref++;
    3293          57 :         pthread_mutex_unlock(&nvme_qpair->ctrlr->mutex);
    3294             : 
    3295          57 :         return 0;
    3296             : }
    3297             : 
    3298             : static int
    3299          57 : bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
    3300             : {
    3301          57 :         struct nvme_ctrlr *nvme_ctrlr = io_device;
    3302          57 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
    3303             : 
    3304          57 :         TAILQ_INIT(&ctrlr_ch->pending_resets);
    3305             : 
    3306          57 :         return nvme_qpair_create(nvme_ctrlr, ctrlr_ch);
    3307             : }
    3308             : 
    3309             : static void
    3310          57 : nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
    3311             : {
    3312             :         struct nvme_io_path *io_path, *next;
    3313             : 
    3314          57 :         assert(nvme_qpair->group != NULL);
    3315             : 
    3316          92 :         TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
    3317          35 :                 TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
    3318          35 :                 nvme_io_path_free(io_path);
    3319             :         }
    3320             : 
    3321          57 :         TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
    3322             : 
    3323          57 :         spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
    3324             : 
    3325          57 :         nvme_ctrlr_release(nvme_qpair->ctrlr);
    3326             : 
    3327          57 :         free(nvme_qpair);
    3328          57 : }
    3329             : 
    3330             : static void
    3331          57 : bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
    3332             : {
    3333          57 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
    3334             :         struct nvme_qpair *nvme_qpair;
    3335             : 
    3336          57 :         nvme_qpair = ctrlr_ch->qpair;
    3337          57 :         assert(nvme_qpair != NULL);
    3338             : 
    3339          57 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    3340             : 
    3341          57 :         if (nvme_qpair->qpair != NULL) {
    3342          43 :                 if (ctrlr_ch->reset_iter == NULL) {
    3343          43 :                         spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
    3344             :                 } else {
    3345             :                         /* Skip current ctrlr_channel in a full reset sequence because
    3346             :                          * it is being deleted now. The qpair is already being disconnected.
    3347             :                          * We do not have to restart disconnecting it.
    3348             :                          */
    3349           0 :                         spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
    3350             :                 }
    3351             : 
    3352             :                 /* We cannot release a reference to the poll group now.
    3353             :                  * The qpair may be disconnected asynchronously later.
    3354             :                  * We need to poll it until it is actually disconnected.
    3355             :                  * Just detach the qpair from the deleting ctrlr_channel.
    3356             :                  */
    3357          43 :                 nvme_qpair->ctrlr_ch = NULL;
    3358             :         } else {
    3359          14 :                 assert(ctrlr_ch->reset_iter == NULL);
    3360             : 
    3361          14 :                 nvme_qpair_delete(nvme_qpair);
    3362             :         }
    3363          57 : }
    3364             : 
    3365             : static inline struct spdk_io_channel *
    3366           0 : bdev_nvme_get_accel_channel(struct nvme_poll_group *group)
    3367             : {
    3368           0 :         if (spdk_unlikely(!group->accel_channel)) {
    3369           0 :                 group->accel_channel = spdk_accel_get_io_channel();
    3370           0 :                 if (!group->accel_channel) {
    3371           0 :                         SPDK_ERRLOG("Cannot get the accel_channel for bdev nvme polling group=%p\n",
    3372             :                                     group);
    3373           0 :                         return NULL;
    3374             :                 }
    3375             :         }
    3376             : 
    3377           0 :         return group->accel_channel;
    3378             : }
    3379             : 
    3380             : static void
    3381           0 : bdev_nvme_submit_accel_crc32c(void *ctx, uint32_t *dst, struct iovec *iov,
    3382             :                               uint32_t iov_cnt, uint32_t seed,
    3383             :                               spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
    3384             : {
    3385             :         struct spdk_io_channel *accel_ch;
    3386           0 :         struct nvme_poll_group *group = ctx;
    3387             :         int rc;
    3388             : 
    3389           0 :         assert(cb_fn != NULL);
    3390             : 
    3391           0 :         accel_ch = bdev_nvme_get_accel_channel(group);
    3392           0 :         if (spdk_unlikely(accel_ch == NULL)) {
    3393           0 :                 cb_fn(cb_arg, -ENOMEM);
    3394           0 :                 return;
    3395             :         }
    3396             : 
    3397           0 :         rc = spdk_accel_submit_crc32cv(accel_ch, dst, iov, iov_cnt, seed, cb_fn, cb_arg);
    3398           0 :         if (rc) {
    3399             :                 /* For the two cases, spdk_accel_submit_crc32cv does not call the user's cb_fn */
    3400           0 :                 if (rc == -ENOMEM || rc == -EINVAL) {
    3401           0 :                         cb_fn(cb_arg, rc);
    3402             :                 }
    3403           0 :                 SPDK_ERRLOG("Cannot complete the accelerated crc32c operation with iov=%p\n", iov);
    3404             :         }
    3405             : }
    3406             : 
    3407             : static void
    3408           0 : bdev_nvme_finish_sequence(void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
    3409             : {
    3410           0 :         spdk_accel_sequence_finish(seq, cb_fn, cb_arg);
    3411           0 : }
    3412             : 
    3413             : static void
    3414           0 : bdev_nvme_abort_sequence(void *seq)
    3415             : {
    3416           0 :         spdk_accel_sequence_abort(seq);
    3417           0 : }
    3418             : 
    3419             : static void
    3420           0 : bdev_nvme_reverse_sequence(void *seq)
    3421             : {
    3422           0 :         spdk_accel_sequence_reverse(seq);
    3423           0 : }
    3424             : 
    3425             : static int
    3426           0 : bdev_nvme_append_crc32c(void *ctx, void **seq, uint32_t *dst, struct iovec *iovs, uint32_t iovcnt,
    3427             :                         struct spdk_memory_domain *domain, void *domain_ctx, uint32_t seed,
    3428             :                         spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
    3429             : {
    3430             :         struct spdk_io_channel *ch;
    3431           0 :         struct nvme_poll_group *group = ctx;
    3432             : 
    3433           0 :         ch = bdev_nvme_get_accel_channel(group);
    3434           0 :         if (spdk_unlikely(ch == NULL)) {
    3435           0 :                 return -ENOMEM;
    3436             :         }
    3437             : 
    3438           0 :         return spdk_accel_append_crc32c((struct spdk_accel_sequence **)seq, ch, dst, iovs, iovcnt,
    3439             :                                         domain, domain_ctx, seed, cb_fn, cb_arg);
    3440             : }
    3441             : 
    3442             : static struct spdk_nvme_accel_fn_table g_bdev_nvme_accel_fn_table = {
    3443             :         .table_size             = sizeof(struct spdk_nvme_accel_fn_table),
    3444             :         .submit_accel_crc32c    = bdev_nvme_submit_accel_crc32c,
    3445             :         .append_crc32c          = bdev_nvme_append_crc32c,
    3446             :         .finish_sequence        = bdev_nvme_finish_sequence,
    3447             :         .reverse_sequence       = bdev_nvme_reverse_sequence,
    3448             :         .abort_sequence         = bdev_nvme_abort_sequence,
    3449             : };
    3450             : 
    3451             : static int
    3452          42 : bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf)
    3453             : {
    3454          42 :         struct nvme_poll_group *group = ctx_buf;
    3455             : 
    3456          42 :         TAILQ_INIT(&group->qpair_list);
    3457             : 
    3458          42 :         group->group = spdk_nvme_poll_group_create(group, &g_bdev_nvme_accel_fn_table);
    3459          42 :         if (group->group == NULL) {
    3460           0 :                 return -1;
    3461             :         }
    3462             : 
    3463          42 :         group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us);
    3464             : 
    3465          42 :         if (group->poller == NULL) {
    3466           0 :                 spdk_nvme_poll_group_destroy(group->group);
    3467           0 :                 return -1;
    3468             :         }
    3469             : 
    3470          42 :         return 0;
    3471             : }
    3472             : 
    3473             : static void
    3474          42 : bdev_nvme_destroy_poll_group_cb(void *io_device, void *ctx_buf)
    3475             : {
    3476          42 :         struct nvme_poll_group *group = ctx_buf;
    3477             : 
    3478          42 :         assert(TAILQ_EMPTY(&group->qpair_list));
    3479             : 
    3480          42 :         if (group->accel_channel) {
    3481           0 :                 spdk_put_io_channel(group->accel_channel);
    3482             :         }
    3483             : 
    3484          42 :         spdk_poller_unregister(&group->poller);
    3485          42 :         if (spdk_nvme_poll_group_destroy(group->group)) {
    3486           0 :                 SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n");
    3487           0 :                 assert(false);
    3488             :         }
    3489          42 : }
    3490             : 
    3491             : static struct spdk_io_channel *
    3492           0 : bdev_nvme_get_io_channel(void *ctx)
    3493             : {
    3494           0 :         struct nvme_bdev *nvme_bdev = ctx;
    3495             : 
    3496           0 :         return spdk_get_io_channel(nvme_bdev);
    3497             : }
    3498             : 
    3499             : static void *
    3500           0 : bdev_nvme_get_module_ctx(void *ctx)
    3501             : {
    3502           0 :         struct nvme_bdev *nvme_bdev = ctx;
    3503             :         struct nvme_ns *nvme_ns;
    3504             : 
    3505           0 :         if (!nvme_bdev || nvme_bdev->disk.module != &nvme_if) {
    3506           0 :                 return NULL;
    3507             :         }
    3508             : 
    3509           0 :         nvme_ns = TAILQ_FIRST(&nvme_bdev->nvme_ns_list);
    3510           0 :         if (!nvme_ns) {
    3511           0 :                 return NULL;
    3512             :         }
    3513             : 
    3514           0 :         return nvme_ns->ns;
    3515             : }
    3516             : 
    3517             : static const char *
    3518           0 : _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state)
    3519             : {
    3520           0 :         switch (ana_state) {
    3521           0 :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    3522           0 :                 return "optimized";
    3523           0 :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    3524           0 :                 return "non_optimized";
    3525           0 :         case SPDK_NVME_ANA_INACCESSIBLE_STATE:
    3526           0 :                 return "inaccessible";
    3527           0 :         case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
    3528           0 :                 return "persistent_loss";
    3529           0 :         case SPDK_NVME_ANA_CHANGE_STATE:
    3530           0 :                 return "change";
    3531           0 :         default:
    3532           0 :                 return NULL;
    3533             :         }
    3534             : }
    3535             : 
    3536             : static int
    3537           8 : bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
    3538             : {
    3539           8 :         struct spdk_memory_domain **_domains = NULL;
    3540           8 :         struct nvme_bdev *nbdev = ctx;
    3541             :         struct nvme_ns *nvme_ns;
    3542           8 :         int i = 0, _array_size = array_size;
    3543           8 :         int rc = 0;
    3544             : 
    3545          22 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
    3546          14 :                 if (domains && array_size >= i) {
    3547          11 :                         _domains = &domains[i];
    3548             :                 } else {
    3549           3 :                         _domains = NULL;
    3550             :                 }
    3551          14 :                 rc = spdk_nvme_ctrlr_get_memory_domains(nvme_ns->ctrlr->ctrlr, _domains, _array_size);
    3552          14 :                 if (rc > 0) {
    3553          13 :                         i += rc;
    3554          13 :                         if (_array_size >= rc) {
    3555           9 :                                 _array_size -= rc;
    3556             :                         } else {
    3557           4 :                                 _array_size = 0;
    3558             :                         }
    3559           1 :                 } else if (rc < 0) {
    3560           0 :                         return rc;
    3561             :                 }
    3562             :         }
    3563             : 
    3564           8 :         return i;
    3565             : }
    3566             : 
    3567             : static const char *
    3568           0 : nvme_ctrlr_get_state_str(struct nvme_ctrlr *nvme_ctrlr)
    3569             : {
    3570           0 :         if (nvme_ctrlr->destruct) {
    3571           0 :                 return "deleting";
    3572           0 :         } else if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
    3573           0 :                 return "failed";
    3574           0 :         } else if (nvme_ctrlr->resetting) {
    3575           0 :                 return "resetting";
    3576           0 :         } else if (nvme_ctrlr->reconnect_is_delayed > 0) {
    3577           0 :                 return "reconnect_is_delayed";
    3578           0 :         } else if (nvme_ctrlr->disabled) {
    3579           0 :                 return "disabled";
    3580             :         } else {
    3581           0 :                 return "enabled";
    3582             :         }
    3583             : }
    3584             : 
    3585             : void
    3586           0 : nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr)
    3587           0 : {
    3588             :         struct spdk_nvme_transport_id *trid;
    3589             :         const struct spdk_nvme_ctrlr_opts *opts;
    3590             :         const struct spdk_nvme_ctrlr_data *cdata;
    3591             :         struct nvme_path_id *path_id;
    3592             : 
    3593           0 :         spdk_json_write_object_begin(w);
    3594             : 
    3595           0 :         spdk_json_write_named_string(w, "state", nvme_ctrlr_get_state_str(nvme_ctrlr));
    3596             : 
    3597             : #ifdef SPDK_CONFIG_NVME_CUSE
    3598           0 :         size_t cuse_name_size = 128;
    3599           0 :         char cuse_name[cuse_name_size];
    3600             : 
    3601           0 :         int rc = spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr, cuse_name, &cuse_name_size);
    3602           0 :         if (rc == 0) {
    3603           0 :                 spdk_json_write_named_string(w, "cuse_device", cuse_name);
    3604             :         }
    3605             : #endif
    3606           0 :         trid = &nvme_ctrlr->active_path_id->trid;
    3607           0 :         spdk_json_write_named_object_begin(w, "trid");
    3608           0 :         nvme_bdev_dump_trid_json(trid, w);
    3609           0 :         spdk_json_write_object_end(w);
    3610             : 
    3611           0 :         path_id = TAILQ_NEXT(nvme_ctrlr->active_path_id, link);
    3612           0 :         if (path_id != NULL) {
    3613           0 :                 spdk_json_write_named_array_begin(w, "alternate_trids");
    3614             :                 do {
    3615           0 :                         trid = &path_id->trid;
    3616           0 :                         spdk_json_write_object_begin(w);
    3617           0 :                         nvme_bdev_dump_trid_json(trid, w);
    3618           0 :                         spdk_json_write_object_end(w);
    3619             : 
    3620           0 :                         path_id = TAILQ_NEXT(path_id, link);
    3621           0 :                 } while (path_id != NULL);
    3622           0 :                 spdk_json_write_array_end(w);
    3623             :         }
    3624             : 
    3625           0 :         cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    3626           0 :         spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
    3627             : 
    3628           0 :         opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
    3629           0 :         spdk_json_write_named_object_begin(w, "host");
    3630           0 :         spdk_json_write_named_string(w, "nqn", opts->hostnqn);
    3631           0 :         spdk_json_write_named_string(w, "addr", opts->src_addr);
    3632           0 :         spdk_json_write_named_string(w, "svcid", opts->src_svcid);
    3633           0 :         spdk_json_write_object_end(w);
    3634             : 
    3635           0 :         spdk_json_write_object_end(w);
    3636           0 : }
    3637             : 
    3638             : static void
    3639           0 : nvme_namespace_info_json(struct spdk_json_write_ctx *w,
    3640             :                          struct nvme_ns *nvme_ns)
    3641           0 : {
    3642             :         struct spdk_nvme_ns *ns;
    3643             :         struct spdk_nvme_ctrlr *ctrlr;
    3644             :         const struct spdk_nvme_ctrlr_data *cdata;
    3645             :         const struct spdk_nvme_transport_id *trid;
    3646             :         union spdk_nvme_vs_register vs;
    3647             :         const struct spdk_nvme_ns_data *nsdata;
    3648           0 :         char buf[128];
    3649             : 
    3650           0 :         ns = nvme_ns->ns;
    3651           0 :         if (ns == NULL) {
    3652           0 :                 return;
    3653             :         }
    3654             : 
    3655           0 :         ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    3656             : 
    3657           0 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3658           0 :         trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
    3659           0 :         vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr);
    3660             : 
    3661           0 :         spdk_json_write_object_begin(w);
    3662             : 
    3663           0 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    3664           0 :                 spdk_json_write_named_string(w, "pci_address", trid->traddr);
    3665             :         }
    3666             : 
    3667           0 :         spdk_json_write_named_object_begin(w, "trid");
    3668             : 
    3669           0 :         nvme_bdev_dump_trid_json(trid, w);
    3670             : 
    3671           0 :         spdk_json_write_object_end(w);
    3672             : 
    3673             : #ifdef SPDK_CONFIG_NVME_CUSE
    3674           0 :         size_t cuse_name_size = 128;
    3675           0 :         char cuse_name[cuse_name_size];
    3676             : 
    3677           0 :         int rc = spdk_nvme_cuse_get_ns_name(ctrlr, spdk_nvme_ns_get_id(ns),
    3678             :                                             cuse_name, &cuse_name_size);
    3679           0 :         if (rc == 0) {
    3680           0 :                 spdk_json_write_named_string(w, "cuse_device", cuse_name);
    3681             :         }
    3682             : #endif
    3683             : 
    3684           0 :         spdk_json_write_named_object_begin(w, "ctrlr_data");
    3685             : 
    3686           0 :         spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
    3687             : 
    3688           0 :         spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
    3689             : 
    3690           0 :         snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
    3691           0 :         spdk_str_trim(buf);
    3692           0 :         spdk_json_write_named_string(w, "model_number", buf);
    3693             : 
    3694           0 :         snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
    3695           0 :         spdk_str_trim(buf);
    3696           0 :         spdk_json_write_named_string(w, "serial_number", buf);
    3697             : 
    3698           0 :         snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
    3699           0 :         spdk_str_trim(buf);
    3700           0 :         spdk_json_write_named_string(w, "firmware_revision", buf);
    3701             : 
    3702           0 :         if (cdata->subnqn[0] != '\0') {
    3703           0 :                 spdk_json_write_named_string(w, "subnqn", cdata->subnqn);
    3704             :         }
    3705             : 
    3706           0 :         spdk_json_write_named_object_begin(w, "oacs");
    3707             : 
    3708           0 :         spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
    3709           0 :         spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
    3710           0 :         spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
    3711           0 :         spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
    3712             : 
    3713           0 :         spdk_json_write_object_end(w);
    3714             : 
    3715           0 :         spdk_json_write_named_bool(w, "multi_ctrlr", cdata->cmic.multi_ctrlr);
    3716           0 :         spdk_json_write_named_bool(w, "ana_reporting", cdata->cmic.ana_reporting);
    3717             : 
    3718           0 :         spdk_json_write_object_end(w);
    3719             : 
    3720           0 :         spdk_json_write_named_object_begin(w, "vs");
    3721             : 
    3722           0 :         spdk_json_write_name(w, "nvme_version");
    3723           0 :         if (vs.bits.ter) {
    3724           0 :                 spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
    3725             :         } else {
    3726           0 :                 spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
    3727             :         }
    3728             : 
    3729           0 :         spdk_json_write_object_end(w);
    3730             : 
    3731           0 :         nsdata = spdk_nvme_ns_get_data(ns);
    3732             : 
    3733           0 :         spdk_json_write_named_object_begin(w, "ns_data");
    3734             : 
    3735           0 :         spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
    3736             : 
    3737           0 :         if (cdata->cmic.ana_reporting) {
    3738           0 :                 spdk_json_write_named_string(w, "ana_state",
    3739             :                                              _nvme_ana_state_str(nvme_ns->ana_state));
    3740             :         }
    3741             : 
    3742           0 :         spdk_json_write_named_bool(w, "can_share", nsdata->nmic.can_share);
    3743             : 
    3744           0 :         spdk_json_write_object_end(w);
    3745             : 
    3746           0 :         if (cdata->oacs.security) {
    3747           0 :                 spdk_json_write_named_object_begin(w, "security");
    3748             : 
    3749           0 :                 spdk_json_write_named_bool(w, "opal", nvme_ns->bdev->opal);
    3750             : 
    3751           0 :                 spdk_json_write_object_end(w);
    3752             :         }
    3753             : 
    3754           0 :         spdk_json_write_object_end(w);
    3755             : }
    3756             : 
    3757             : static const char *
    3758           0 : nvme_bdev_get_mp_policy_str(struct nvme_bdev *nbdev)
    3759             : {
    3760           0 :         switch (nbdev->mp_policy) {
    3761           0 :         case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
    3762           0 :                 return "active_passive";
    3763           0 :         case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
    3764           0 :                 return "active_active";
    3765           0 :         default:
    3766           0 :                 assert(false);
    3767             :                 return "invalid";
    3768             :         }
    3769             : }
    3770             : 
    3771             : static const char *
    3772           0 : nvme_bdev_get_mp_selector_str(struct nvme_bdev *nbdev)
    3773             : {
    3774           0 :         switch (nbdev->mp_selector) {
    3775           0 :         case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
    3776           0 :                 return "round_robin";
    3777           0 :         case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
    3778           0 :                 return "queue_depth";
    3779           0 :         default:
    3780           0 :                 assert(false);
    3781             :                 return "invalid";
    3782             :         }
    3783             : }
    3784             : 
    3785             : static int
    3786           0 : bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
    3787             : {
    3788           0 :         struct nvme_bdev *nvme_bdev = ctx;
    3789             :         struct nvme_ns *nvme_ns;
    3790             : 
    3791           0 :         pthread_mutex_lock(&nvme_bdev->mutex);
    3792           0 :         spdk_json_write_named_array_begin(w, "nvme");
    3793           0 :         TAILQ_FOREACH(nvme_ns, &nvme_bdev->nvme_ns_list, tailq) {
    3794           0 :                 nvme_namespace_info_json(w, nvme_ns);
    3795             :         }
    3796           0 :         spdk_json_write_array_end(w);
    3797           0 :         spdk_json_write_named_string(w, "mp_policy", nvme_bdev_get_mp_policy_str(nvme_bdev));
    3798           0 :         if (nvme_bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
    3799           0 :                 spdk_json_write_named_string(w, "selector", nvme_bdev_get_mp_selector_str(nvme_bdev));
    3800           0 :                 if (nvme_bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    3801           0 :                         spdk_json_write_named_uint32(w, "rr_min_io", nvme_bdev->rr_min_io);
    3802             :                 }
    3803             :         }
    3804           0 :         pthread_mutex_unlock(&nvme_bdev->mutex);
    3805             : 
    3806           0 :         return 0;
    3807             : }
    3808             : 
    3809             : static void
    3810           0 : bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
    3811             : {
    3812             :         /* No config per bdev needed */
    3813           0 : }
    3814             : 
    3815             : static uint64_t
    3816           0 : bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
    3817             : {
    3818           0 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
    3819             :         struct nvme_io_path *io_path;
    3820             :         struct nvme_poll_group *group;
    3821           0 :         uint64_t spin_time = 0;
    3822             : 
    3823           0 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    3824           0 :                 group = io_path->qpair->group;
    3825             : 
    3826           0 :                 if (!group || !group->collect_spin_stat) {
    3827           0 :                         continue;
    3828             :                 }
    3829             : 
    3830           0 :                 if (group->end_ticks != 0) {
    3831           0 :                         group->spin_ticks += (group->end_ticks - group->start_ticks);
    3832           0 :                         group->end_ticks = 0;
    3833             :                 }
    3834             : 
    3835           0 :                 spin_time += group->spin_ticks;
    3836           0 :                 group->start_ticks = 0;
    3837           0 :                 group->spin_ticks = 0;
    3838             :         }
    3839             : 
    3840           0 :         return (spin_time * 1000000ULL) / spdk_get_ticks_hz();
    3841             : }
    3842             : 
    3843             : static void
    3844           0 : bdev_nvme_reset_device_stat(void *ctx)
    3845             : {
    3846           0 :         struct nvme_bdev *nbdev = ctx;
    3847             : 
    3848           0 :         if (nbdev->err_stat != NULL) {
    3849           0 :                 memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat));
    3850             :         }
    3851           0 : }
    3852             : 
    3853             : /* JSON string should be lowercases and underscore delimited string. */
    3854             : static void
    3855           0 : bdev_nvme_format_nvme_status(char *dst, const char *src)
    3856             : {
    3857           0 :         char tmp[256];
    3858             : 
    3859           0 :         spdk_strcpy_replace(dst, 256, src, " - ", "_");
    3860           0 :         spdk_strcpy_replace(tmp, 256, dst, "-", "_");
    3861           0 :         spdk_strcpy_replace(dst, 256, tmp, " ", "_");
    3862           0 :         spdk_strlwr(dst);
    3863           0 : }
    3864             : 
    3865             : static void
    3866           0 : bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w)
    3867             : {
    3868           0 :         struct nvme_bdev *nbdev = ctx;
    3869           0 :         struct spdk_nvme_status status = {};
    3870             :         uint16_t sct, sc;
    3871           0 :         char status_json[256];
    3872             :         const char *status_str;
    3873             : 
    3874           0 :         if (nbdev->err_stat == NULL) {
    3875           0 :                 return;
    3876             :         }
    3877             : 
    3878           0 :         spdk_json_write_named_object_begin(w, "nvme_error");
    3879             : 
    3880           0 :         spdk_json_write_named_object_begin(w, "status_type");
    3881           0 :         for (sct = 0; sct < 8; sct++) {
    3882           0 :                 if (nbdev->err_stat->status_type[sct] == 0) {
    3883           0 :                         continue;
    3884             :                 }
    3885           0 :                 status.sct = sct;
    3886             : 
    3887           0 :                 status_str = spdk_nvme_cpl_get_status_type_string(&status);
    3888           0 :                 assert(status_str != NULL);
    3889           0 :                 bdev_nvme_format_nvme_status(status_json, status_str);
    3890             : 
    3891           0 :                 spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]);
    3892             :         }
    3893           0 :         spdk_json_write_object_end(w);
    3894             : 
    3895           0 :         spdk_json_write_named_object_begin(w, "status_code");
    3896           0 :         for (sct = 0; sct < 4; sct++) {
    3897           0 :                 status.sct = sct;
    3898           0 :                 for (sc = 0; sc < 256; sc++) {
    3899           0 :                         if (nbdev->err_stat->status[sct][sc] == 0) {
    3900           0 :                                 continue;
    3901             :                         }
    3902           0 :                         status.sc = sc;
    3903             : 
    3904           0 :                         status_str = spdk_nvme_cpl_get_status_string(&status);
    3905           0 :                         assert(status_str != NULL);
    3906           0 :                         bdev_nvme_format_nvme_status(status_json, status_str);
    3907             : 
    3908           0 :                         spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]);
    3909             :                 }
    3910             :         }
    3911           0 :         spdk_json_write_object_end(w);
    3912             : 
    3913           0 :         spdk_json_write_object_end(w);
    3914             : }
    3915             : 
    3916             : static bool
    3917           0 : bdev_nvme_accel_sequence_supported(void *ctx, enum spdk_bdev_io_type type)
    3918             : {
    3919           0 :         struct nvme_bdev *nbdev = ctx;
    3920             :         struct spdk_nvme_ctrlr *ctrlr;
    3921             : 
    3922           0 :         if (!g_opts.allow_accel_sequence) {
    3923           0 :                 return false;
    3924             :         }
    3925             : 
    3926           0 :         switch (type) {
    3927           0 :         case SPDK_BDEV_IO_TYPE_WRITE:
    3928             :         case SPDK_BDEV_IO_TYPE_READ:
    3929           0 :                 break;
    3930           0 :         default:
    3931           0 :                 return false;
    3932             :         }
    3933             : 
    3934           0 :         ctrlr = bdev_nvme_get_ctrlr(&nbdev->disk);
    3935           0 :         assert(ctrlr != NULL);
    3936             : 
    3937           0 :         return spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
    3938             : }
    3939             : 
    3940             : static const struct spdk_bdev_fn_table nvmelib_fn_table = {
    3941             :         .destruct                       = bdev_nvme_destruct,
    3942             :         .submit_request                 = bdev_nvme_submit_request,
    3943             :         .io_type_supported              = bdev_nvme_io_type_supported,
    3944             :         .get_io_channel                 = bdev_nvme_get_io_channel,
    3945             :         .dump_info_json                 = bdev_nvme_dump_info_json,
    3946             :         .write_config_json              = bdev_nvme_write_config_json,
    3947             :         .get_spin_time                  = bdev_nvme_get_spin_time,
    3948             :         .get_module_ctx                 = bdev_nvme_get_module_ctx,
    3949             :         .get_memory_domains             = bdev_nvme_get_memory_domains,
    3950             :         .accel_sequence_supported       = bdev_nvme_accel_sequence_supported,
    3951             :         .reset_device_stat              = bdev_nvme_reset_device_stat,
    3952             :         .dump_device_stat_json          = bdev_nvme_dump_device_stat_json,
    3953             : };
    3954             : 
    3955             : typedef int (*bdev_nvme_parse_ana_log_page_cb)(
    3956             :         const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg);
    3957             : 
    3958             : static int
    3959          41 : bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
    3960             :                              bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
    3961             : {
    3962             :         struct spdk_nvme_ana_group_descriptor *copied_desc;
    3963             :         uint8_t *orig_desc;
    3964             :         uint32_t i, desc_size, copy_len;
    3965          41 :         int rc = 0;
    3966             : 
    3967          41 :         if (nvme_ctrlr->ana_log_page == NULL) {
    3968           0 :                 return -EINVAL;
    3969             :         }
    3970             : 
    3971          41 :         copied_desc = nvme_ctrlr->copied_ana_desc;
    3972             : 
    3973          41 :         orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
    3974          41 :         copy_len = nvme_ctrlr->max_ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
    3975             : 
    3976          71 :         for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) {
    3977          66 :                 memcpy(copied_desc, orig_desc, copy_len);
    3978             : 
    3979          66 :                 rc = cb_fn(copied_desc, cb_arg);
    3980          66 :                 if (rc != 0) {
    3981          36 :                         break;
    3982             :                 }
    3983             : 
    3984          30 :                 desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
    3985          30 :                             copied_desc->num_of_nsid * sizeof(uint32_t);
    3986          30 :                 orig_desc += desc_size;
    3987          30 :                 copy_len -= desc_size;
    3988             :         }
    3989             : 
    3990          41 :         return rc;
    3991             : }
    3992             : 
    3993             : static int
    3994           5 : nvme_ns_ana_transition_timedout(void *ctx)
    3995             : {
    3996           5 :         struct nvme_ns *nvme_ns = ctx;
    3997             : 
    3998           5 :         spdk_poller_unregister(&nvme_ns->anatt_timer);
    3999           5 :         nvme_ns->ana_transition_timedout = true;
    4000             : 
    4001           5 :         return SPDK_POLLER_BUSY;
    4002             : }
    4003             : 
    4004             : static void
    4005          45 : _nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
    4006             :                        const struct spdk_nvme_ana_group_descriptor *desc)
    4007             : {
    4008             :         const struct spdk_nvme_ctrlr_data *cdata;
    4009             : 
    4010          45 :         nvme_ns->ana_group_id = desc->ana_group_id;
    4011          45 :         nvme_ns->ana_state = desc->ana_state;
    4012          45 :         nvme_ns->ana_state_updating = false;
    4013             : 
    4014          45 :         switch (nvme_ns->ana_state) {
    4015          38 :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    4016             :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    4017          38 :                 nvme_ns->ana_transition_timedout = false;
    4018          38 :                 spdk_poller_unregister(&nvme_ns->anatt_timer);
    4019          38 :                 break;
    4020             : 
    4021           6 :         case SPDK_NVME_ANA_INACCESSIBLE_STATE:
    4022             :         case SPDK_NVME_ANA_CHANGE_STATE:
    4023           6 :                 if (nvme_ns->anatt_timer != NULL) {
    4024           1 :                         break;
    4025             :                 }
    4026             : 
    4027           5 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
    4028           5 :                 nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
    4029             :                                        nvme_ns,
    4030             :                                        cdata->anatt * SPDK_SEC_TO_USEC);
    4031           5 :                 break;
    4032           1 :         default:
    4033           1 :                 break;
    4034             :         }
    4035          45 : }
    4036             : 
    4037             : static int
    4038          59 : nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg)
    4039             : {
    4040          59 :         struct nvme_ns *nvme_ns = cb_arg;
    4041             :         uint32_t i;
    4042             : 
    4043          59 :         assert(nvme_ns->ns != NULL);
    4044             : 
    4045          81 :         for (i = 0; i < desc->num_of_nsid; i++) {
    4046          58 :                 if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) {
    4047          22 :                         continue;
    4048             :                 }
    4049             : 
    4050          36 :                 _nvme_ns_set_ana_state(nvme_ns, desc);
    4051          36 :                 return 1;
    4052             :         }
    4053             : 
    4054          23 :         return 0;
    4055             : }
    4056             : 
    4057             : static int
    4058           5 : nvme_generate_uuid(const char *sn, uint32_t nsid, struct spdk_uuid *uuid)
    4059             : {
    4060           5 :         int rc = 0;
    4061           5 :         struct spdk_uuid new_uuid, namespace_uuid;
    4062           5 :         char merged_str[SPDK_NVME_CTRLR_SN_LEN + NSID_STR_LEN + 1] = {'\0'};
    4063             :         /* This namespace UUID was generated using uuid_generate() method. */
    4064           5 :         const char *namespace_str = {"edaed2de-24bc-4b07-b559-f47ecbe730fd"};
    4065             :         int size;
    4066             : 
    4067           5 :         assert(strlen(sn) <= SPDK_NVME_CTRLR_SN_LEN);
    4068             : 
    4069           5 :         spdk_uuid_set_null(&new_uuid);
    4070           5 :         spdk_uuid_set_null(&namespace_uuid);
    4071             : 
    4072           5 :         size = snprintf(merged_str, sizeof(merged_str), "%s%"PRIu32, sn, nsid);
    4073           5 :         if (size <= 0 || (unsigned long)size >= sizeof(merged_str)) {
    4074           0 :                 return -EINVAL;
    4075             :         }
    4076             : 
    4077           5 :         spdk_uuid_parse(&namespace_uuid, namespace_str);
    4078             : 
    4079           5 :         rc = spdk_uuid_generate_sha1(&new_uuid, &namespace_uuid, merged_str, size);
    4080           5 :         if (rc == 0) {
    4081           5 :                 memcpy(uuid, &new_uuid, sizeof(struct spdk_uuid));
    4082             :         }
    4083             : 
    4084           5 :         return rc;
    4085             : }
    4086             : 
    4087             : static int
    4088          37 : nvme_disk_create(struct spdk_bdev *disk, const char *base_name,
    4089             :                  struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns,
    4090             :                  uint32_t prchk_flags, void *ctx)
    4091             : {
    4092             :         const struct spdk_uuid          *uuid;
    4093             :         const uint8_t *nguid;
    4094             :         const struct spdk_nvme_ctrlr_data *cdata;
    4095             :         const struct spdk_nvme_ns_data  *nsdata;
    4096             :         const struct spdk_nvme_ctrlr_opts *opts;
    4097             :         enum spdk_nvme_csi              csi;
    4098             :         uint32_t atomic_bs, phys_bs, bs;
    4099          37 :         char sn_tmp[SPDK_NVME_CTRLR_SN_LEN + 1] = {'\0'};
    4100             :         int rc;
    4101             : 
    4102          37 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    4103          37 :         csi = spdk_nvme_ns_get_csi(ns);
    4104          37 :         opts = spdk_nvme_ctrlr_get_opts(ctrlr);
    4105             : 
    4106          37 :         switch (csi) {
    4107          37 :         case SPDK_NVME_CSI_NVM:
    4108          37 :                 disk->product_name = "NVMe disk";
    4109          37 :                 break;
    4110           0 :         case SPDK_NVME_CSI_ZNS:
    4111           0 :                 disk->product_name = "NVMe ZNS disk";
    4112           0 :                 disk->zoned = true;
    4113           0 :                 disk->zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    4114           0 :                 disk->max_zone_append_size = spdk_nvme_zns_ctrlr_get_max_zone_append_size(ctrlr) /
    4115           0 :                                              spdk_nvme_ns_get_extended_sector_size(ns);
    4116           0 :                 disk->max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(ns);
    4117           0 :                 disk->max_active_zones = spdk_nvme_zns_ns_get_max_active_zones(ns);
    4118           0 :                 break;
    4119           0 :         default:
    4120           0 :                 SPDK_ERRLOG("unsupported CSI: %u\n", csi);
    4121           0 :                 return -ENOTSUP;
    4122             :         }
    4123             : 
    4124          37 :         nguid = spdk_nvme_ns_get_nguid(ns);
    4125          37 :         if (!nguid) {
    4126          37 :                 uuid = spdk_nvme_ns_get_uuid(ns);
    4127          37 :                 if (uuid) {
    4128          12 :                         disk->uuid = *uuid;
    4129          25 :                 } else if (g_opts.generate_uuids) {
    4130           0 :                         spdk_strcpy_pad(sn_tmp, cdata->sn, SPDK_NVME_CTRLR_SN_LEN, '\0');
    4131           0 :                         rc = nvme_generate_uuid(sn_tmp, spdk_nvme_ns_get_id(ns), &disk->uuid);
    4132           0 :                         if (rc < 0) {
    4133           0 :                                 SPDK_ERRLOG("UUID generation failed (%s)\n", spdk_strerror(-rc));
    4134           0 :                                 return rc;
    4135             :                         }
    4136             :                 }
    4137             :         } else {
    4138           0 :                 memcpy(&disk->uuid, nguid, sizeof(disk->uuid));
    4139             :         }
    4140             : 
    4141          37 :         disk->name = spdk_sprintf_alloc("%sn%d", base_name, spdk_nvme_ns_get_id(ns));
    4142          37 :         if (!disk->name) {
    4143           0 :                 return -ENOMEM;
    4144             :         }
    4145             : 
    4146          37 :         disk->write_cache = 0;
    4147          37 :         if (cdata->vwc.present) {
    4148             :                 /* Enable if the Volatile Write Cache exists */
    4149           0 :                 disk->write_cache = 1;
    4150             :         }
    4151          37 :         if (cdata->oncs.write_zeroes) {
    4152           0 :                 disk->max_write_zeroes = UINT16_MAX + 1;
    4153             :         }
    4154          37 :         disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
    4155          37 :         disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns);
    4156          37 :         disk->max_segment_size = spdk_nvme_ctrlr_get_max_xfer_size(ctrlr);
    4157          37 :         disk->ctratt.raw = cdata->ctratt.raw;
    4158             :         /* NVMe driver will split one request into multiple requests
    4159             :          * based on MDTS and stripe boundary, the bdev layer will use
    4160             :          * max_segment_size and max_num_segments to split one big IO
    4161             :          * into multiple requests, then small request can't run out
    4162             :          * of NVMe internal requests data structure.
    4163             :          */
    4164          37 :         if (opts && opts->io_queue_requests) {
    4165           0 :                 disk->max_num_segments = opts->io_queue_requests / 2;
    4166             :         }
    4167          37 :         if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_SGL_SUPPORTED) {
    4168             :                 /* The nvme driver will try to split I/O that have too many
    4169             :                  * SGEs, but it doesn't work if that last SGE doesn't end on
    4170             :                  * an aggregate total that is block aligned. The bdev layer has
    4171             :                  * a more robust splitting framework, so use that instead for
    4172             :                  * this case. (See issue #3269.)
    4173             :                  */
    4174           0 :                 uint16_t max_sges = spdk_nvme_ctrlr_get_max_sges(ctrlr);
    4175             : 
    4176           0 :                 if (disk->max_num_segments == 0) {
    4177           0 :                         disk->max_num_segments = max_sges;
    4178             :                 } else {
    4179           0 :                         disk->max_num_segments = spdk_min(disk->max_num_segments, max_sges);
    4180             :                 }
    4181             :         }
    4182          37 :         disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
    4183             : 
    4184          37 :         nsdata = spdk_nvme_ns_get_data(ns);
    4185          37 :         bs = spdk_nvme_ns_get_sector_size(ns);
    4186          37 :         atomic_bs = bs;
    4187          37 :         phys_bs = bs;
    4188          37 :         if (nsdata->nabo == 0) {
    4189          37 :                 if (nsdata->nsfeat.ns_atomic_write_unit && nsdata->nawupf) {
    4190           0 :                         atomic_bs = bs * (1 + nsdata->nawupf);
    4191             :                 } else {
    4192          37 :                         atomic_bs = bs * (1 + cdata->awupf);
    4193             :                 }
    4194             :         }
    4195          37 :         if (nsdata->nsfeat.optperf) {
    4196           0 :                 phys_bs = bs * (1 + nsdata->npwg);
    4197             :         }
    4198          37 :         disk->phys_blocklen = spdk_min(phys_bs, atomic_bs);
    4199             : 
    4200          37 :         disk->md_len = spdk_nvme_ns_get_md_size(ns);
    4201          37 :         if (disk->md_len != 0) {
    4202           0 :                 disk->md_interleave = nsdata->flbas.extended;
    4203           0 :                 disk->dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
    4204           0 :                 if (disk->dif_type != SPDK_DIF_DISABLE) {
    4205           0 :                         disk->dif_is_head_of_md = nsdata->dps.md_start;
    4206           0 :                         disk->dif_check_flags = prchk_flags;
    4207           0 :                         disk->dif_pi_format = (enum spdk_dif_pi_format)spdk_nvme_ns_get_pi_format(ns);
    4208             :                 }
    4209             :         }
    4210             : 
    4211          37 :         if (!(spdk_nvme_ctrlr_get_flags(ctrlr) &
    4212             :               SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED)) {
    4213          37 :                 disk->acwu = 0;
    4214           0 :         } else if (nsdata->nsfeat.ns_atomic_write_unit) {
    4215           0 :                 disk->acwu = nsdata->nacwu + 1; /* 0-based */
    4216             :         } else {
    4217           0 :                 disk->acwu = cdata->acwu + 1; /* 0-based */
    4218             :         }
    4219             : 
    4220          37 :         if (cdata->oncs.copy) {
    4221             :                 /* For now bdev interface allows only single segment copy */
    4222           0 :                 disk->max_copy = nsdata->mssrl;
    4223             :         }
    4224             : 
    4225          37 :         disk->ctxt = ctx;
    4226          37 :         disk->fn_table = &nvmelib_fn_table;
    4227          37 :         disk->module = &nvme_if;
    4228             : 
    4229          37 :         return 0;
    4230             : }
    4231             : 
    4232             : static struct nvme_bdev *
    4233          37 : nvme_bdev_alloc(void)
    4234             : {
    4235             :         struct nvme_bdev *bdev;
    4236             :         int rc;
    4237             : 
    4238          37 :         bdev = calloc(1, sizeof(*bdev));
    4239          37 :         if (!bdev) {
    4240           0 :                 SPDK_ERRLOG("bdev calloc() failed\n");
    4241           0 :                 return NULL;
    4242             :         }
    4243             : 
    4244          37 :         if (g_opts.nvme_error_stat) {
    4245           0 :                 bdev->err_stat = calloc(1, sizeof(struct nvme_error_stat));
    4246           0 :                 if (!bdev->err_stat) {
    4247           0 :                         SPDK_ERRLOG("err_stat calloc() failed\n");
    4248           0 :                         free(bdev);
    4249           0 :                         return NULL;
    4250             :                 }
    4251             :         }
    4252             : 
    4253          37 :         rc = pthread_mutex_init(&bdev->mutex, NULL);
    4254          37 :         if (rc != 0) {
    4255           0 :                 free(bdev->err_stat);
    4256           0 :                 free(bdev);
    4257           0 :                 return NULL;
    4258             :         }
    4259             : 
    4260          37 :         bdev->ref = 1;
    4261          37 :         bdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE;
    4262          37 :         bdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN;
    4263          37 :         bdev->rr_min_io = UINT32_MAX;
    4264          37 :         TAILQ_INIT(&bdev->nvme_ns_list);
    4265             : 
    4266          37 :         return bdev;
    4267             : }
    4268             : 
    4269             : static int
    4270          37 : nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4271             : {
    4272             :         struct nvme_bdev *bdev;
    4273          37 :         struct nvme_bdev_ctrlr *nbdev_ctrlr = nvme_ctrlr->nbdev_ctrlr;
    4274             :         int rc;
    4275             : 
    4276          37 :         bdev = nvme_bdev_alloc();
    4277          37 :         if (bdev == NULL) {
    4278           0 :                 SPDK_ERRLOG("Failed to allocate NVMe bdev\n");
    4279           0 :                 return -ENOMEM;
    4280             :         }
    4281             : 
    4282          37 :         bdev->opal = nvme_ctrlr->opal_dev != NULL;
    4283             : 
    4284          37 :         rc = nvme_disk_create(&bdev->disk, nbdev_ctrlr->name, nvme_ctrlr->ctrlr,
    4285             :                               nvme_ns->ns, nvme_ctrlr->opts.prchk_flags, bdev);
    4286          37 :         if (rc != 0) {
    4287           0 :                 SPDK_ERRLOG("Failed to create NVMe disk\n");
    4288           0 :                 nvme_bdev_free(bdev);
    4289           0 :                 return rc;
    4290             :         }
    4291             : 
    4292          37 :         spdk_io_device_register(bdev,
    4293             :                                 bdev_nvme_create_bdev_channel_cb,
    4294             :                                 bdev_nvme_destroy_bdev_channel_cb,
    4295             :                                 sizeof(struct nvme_bdev_channel),
    4296          37 :                                 bdev->disk.name);
    4297             : 
    4298          37 :         nvme_ns->bdev = bdev;
    4299          37 :         bdev->nsid = nvme_ns->id;
    4300          37 :         TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
    4301             : 
    4302          37 :         bdev->nbdev_ctrlr = nbdev_ctrlr;
    4303          37 :         TAILQ_INSERT_TAIL(&nbdev_ctrlr->bdevs, bdev, tailq);
    4304             : 
    4305          37 :         rc = spdk_bdev_register(&bdev->disk);
    4306          37 :         if (rc != 0) {
    4307           1 :                 SPDK_ERRLOG("spdk_bdev_register() failed\n");
    4308           1 :                 spdk_io_device_unregister(bdev, NULL);
    4309           1 :                 nvme_ns->bdev = NULL;
    4310           1 :                 TAILQ_REMOVE(&nbdev_ctrlr->bdevs, bdev, tailq);
    4311           1 :                 nvme_bdev_free(bdev);
    4312           1 :                 return rc;
    4313             :         }
    4314             : 
    4315          36 :         return 0;
    4316             : }
    4317             : 
    4318             : static bool
    4319          23 : bdev_nvme_compare_ns(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
    4320             : {
    4321             :         const struct spdk_nvme_ns_data *nsdata1, *nsdata2;
    4322             :         const struct spdk_uuid *uuid1, *uuid2;
    4323             : 
    4324          23 :         nsdata1 = spdk_nvme_ns_get_data(ns1);
    4325          23 :         nsdata2 = spdk_nvme_ns_get_data(ns2);
    4326          23 :         uuid1 = spdk_nvme_ns_get_uuid(ns1);
    4327          23 :         uuid2 = spdk_nvme_ns_get_uuid(ns2);
    4328             : 
    4329          45 :         return memcmp(nsdata1->nguid, nsdata2->nguid, sizeof(nsdata1->nguid)) == 0 &&
    4330          22 :                nsdata1->eui64 == nsdata2->eui64 &&
    4331          21 :                ((uuid1 == NULL && uuid2 == NULL) ||
    4332          59 :                 (uuid1 != NULL && uuid2 != NULL && spdk_uuid_compare(uuid1, uuid2) == 0)) &&
    4333          18 :                spdk_nvme_ns_get_csi(ns1) == spdk_nvme_ns_get_csi(ns2);
    4334             : }
    4335             : 
    4336             : static bool
    4337           0 : hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    4338             :                  struct spdk_nvme_ctrlr_opts *opts)
    4339             : {
    4340             :         struct nvme_probe_skip_entry *entry;
    4341             : 
    4342           0 :         TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
    4343           0 :                 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
    4344           0 :                         return false;
    4345             :                 }
    4346             :         }
    4347             : 
    4348           0 :         opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
    4349           0 :         opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
    4350           0 :         opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
    4351           0 :         opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
    4352           0 :         opts->disable_read_ana_log_page = true;
    4353             : 
    4354           0 :         SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr);
    4355             : 
    4356           0 :         return true;
    4357             : }
    4358             : 
    4359             : static void
    4360           0 : nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
    4361             : {
    4362           0 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    4363             : 
    4364           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    4365           0 :                 SPDK_WARNLOG("Abort failed. Resetting controller. sc is %u, sct is %u.\n", cpl->status.sc,
    4366             :                              cpl->status.sct);
    4367           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4368           0 :         } else if (cpl->cdw0 & 0x1) {
    4369           0 :                 SPDK_WARNLOG("Specified command could not be aborted.\n");
    4370           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4371             :         }
    4372           0 : }
    4373             : 
    4374             : static void
    4375           0 : timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
    4376             :            struct spdk_nvme_qpair *qpair, uint16_t cid)
    4377             : {
    4378           0 :         struct nvme_ctrlr *nvme_ctrlr = cb_arg;
    4379             :         union spdk_nvme_csts_register csts;
    4380             :         int rc;
    4381             : 
    4382           0 :         assert(nvme_ctrlr->ctrlr == ctrlr);
    4383             : 
    4384           0 :         SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
    4385             : 
    4386             :         /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O
    4387             :          * queue.  (Note: qpair == NULL when there's an admin cmd timeout.)  Otherwise we
    4388             :          * would submit another fabrics cmd on the admin queue to read CSTS and check for its
    4389             :          * completion recursively.
    4390             :          */
    4391           0 :         if (nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE || qpair != NULL) {
    4392           0 :                 csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
    4393           0 :                 if (csts.bits.cfs) {
    4394           0 :                         SPDK_ERRLOG("Controller Fatal Status, reset required\n");
    4395           0 :                         bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4396           0 :                         return;
    4397             :                 }
    4398             :         }
    4399             : 
    4400           0 :         switch (g_opts.action_on_timeout) {
    4401           0 :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
    4402           0 :                 if (qpair) {
    4403             :                         /* Don't send abort to ctrlr when ctrlr is not available. */
    4404           0 :                         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4405           0 :                         if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
    4406           0 :                                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4407           0 :                                 SPDK_NOTICELOG("Quit abort. Ctrlr is not available.\n");
    4408           0 :                                 return;
    4409             :                         }
    4410           0 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4411             : 
    4412           0 :                         rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
    4413             :                                                        nvme_abort_cpl, nvme_ctrlr);
    4414           0 :                         if (rc == 0) {
    4415           0 :                                 return;
    4416             :                         }
    4417             : 
    4418           0 :                         SPDK_ERRLOG("Unable to send abort. Resetting, rc is %d.\n", rc);
    4419             :                 }
    4420             : 
    4421             :         /* FALLTHROUGH */
    4422             :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
    4423           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4424           0 :                 break;
    4425           0 :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
    4426           0 :                 SPDK_DEBUGLOG(bdev_nvme, "No action for nvme controller timeout.\n");
    4427           0 :                 break;
    4428           0 :         default:
    4429           0 :                 SPDK_ERRLOG("An invalid timeout action value is found.\n");
    4430           0 :                 break;
    4431             :         }
    4432             : }
    4433             : 
    4434             : static struct nvme_ns *
    4435          50 : nvme_ns_alloc(void)
    4436             : {
    4437             :         struct nvme_ns *nvme_ns;
    4438             : 
    4439          50 :         nvme_ns = calloc(1, sizeof(struct nvme_ns));
    4440          50 :         if (nvme_ns == NULL) {
    4441           0 :                 return NULL;
    4442             :         }
    4443             : 
    4444          50 :         if (g_opts.io_path_stat) {
    4445           0 :                 nvme_ns->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
    4446           0 :                 if (nvme_ns->stat == NULL) {
    4447           0 :                         free(nvme_ns);
    4448           0 :                         return NULL;
    4449             :                 }
    4450           0 :                 spdk_bdev_reset_io_stat(nvme_ns->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
    4451             :         }
    4452             : 
    4453          50 :         return nvme_ns;
    4454             : }
    4455             : 
    4456             : static void
    4457          50 : nvme_ns_free(struct nvme_ns *nvme_ns)
    4458             : {
    4459          50 :         free(nvme_ns->stat);
    4460          50 :         free(nvme_ns);
    4461          50 : }
    4462             : 
    4463             : static void
    4464          50 : nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc)
    4465             : {
    4466          50 :         struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
    4467          50 :         struct nvme_async_probe_ctx *ctx = nvme_ns->probe_ctx;
    4468             : 
    4469          50 :         if (rc == 0) {
    4470          48 :                 nvme_ns->probe_ctx = NULL;
    4471          48 :                 pthread_mutex_lock(&nvme_ctrlr->mutex);
    4472          48 :                 nvme_ctrlr->ref++;
    4473          48 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4474             :         } else {
    4475           2 :                 RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4476           2 :                 nvme_ns_free(nvme_ns);
    4477             :         }
    4478             : 
    4479          50 :         if (ctx) {
    4480          49 :                 ctx->populates_in_progress--;
    4481          49 :                 if (ctx->populates_in_progress == 0) {
    4482          12 :                         nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
    4483             :                 }
    4484             :         }
    4485          50 : }
    4486             : 
    4487             : static void
    4488           2 : bdev_nvme_add_io_path(struct spdk_io_channel_iter *i)
    4489             : {
    4490           2 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    4491           2 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
    4492           2 :         struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
    4493             :         int rc;
    4494             : 
    4495           2 :         rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
    4496           2 :         if (rc != 0) {
    4497           0 :                 SPDK_ERRLOG("Failed to add I/O path to bdev_channel dynamically.\n");
    4498             :         }
    4499             : 
    4500           2 :         spdk_for_each_channel_continue(i, rc);
    4501           2 : }
    4502             : 
    4503             : static void
    4504           2 : bdev_nvme_delete_io_path(struct spdk_io_channel_iter *i)
    4505             : {
    4506           2 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    4507           2 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
    4508           2 :         struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
    4509             :         struct nvme_io_path *io_path;
    4510             : 
    4511           2 :         io_path = _bdev_nvme_get_io_path(nbdev_ch, nvme_ns);
    4512           2 :         if (io_path != NULL) {
    4513           2 :                 _bdev_nvme_delete_io_path(nbdev_ch, io_path);
    4514             :         }
    4515             : 
    4516           2 :         spdk_for_each_channel_continue(i, 0);
    4517           2 : }
    4518             : 
    4519             : static void
    4520           0 : bdev_nvme_add_io_path_failed(struct spdk_io_channel_iter *i, int status)
    4521             : {
    4522           0 :         struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
    4523             : 
    4524           0 :         nvme_ctrlr_populate_namespace_done(nvme_ns, -1);
    4525           0 : }
    4526             : 
    4527             : static void
    4528          12 : bdev_nvme_add_io_path_done(struct spdk_io_channel_iter *i, int status)
    4529             : {
    4530          12 :         struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
    4531          12 :         struct nvme_bdev *bdev = spdk_io_channel_iter_get_io_device(i);
    4532             : 
    4533          12 :         if (status == 0) {
    4534          12 :                 nvme_ctrlr_populate_namespace_done(nvme_ns, 0);
    4535             :         } else {
    4536             :                 /* Delete the added io_paths and fail populating the namespace. */
    4537           0 :                 spdk_for_each_channel(bdev,
    4538             :                                       bdev_nvme_delete_io_path,
    4539             :                                       nvme_ns,
    4540             :                                       bdev_nvme_add_io_path_failed);
    4541             :         }
    4542          12 : }
    4543             : 
    4544             : static int
    4545          13 : nvme_bdev_add_ns(struct nvme_bdev *bdev, struct nvme_ns *nvme_ns)
    4546             : {
    4547             :         struct nvme_ns *tmp_ns;
    4548             :         const struct spdk_nvme_ns_data *nsdata;
    4549             : 
    4550          13 :         nsdata = spdk_nvme_ns_get_data(nvme_ns->ns);
    4551          13 :         if (!nsdata->nmic.can_share) {
    4552           0 :                 SPDK_ERRLOG("Namespace cannot be shared.\n");
    4553           0 :                 return -EINVAL;
    4554             :         }
    4555             : 
    4556          13 :         pthread_mutex_lock(&bdev->mutex);
    4557             : 
    4558          13 :         tmp_ns = TAILQ_FIRST(&bdev->nvme_ns_list);
    4559          13 :         assert(tmp_ns != NULL);
    4560             : 
    4561          13 :         if (tmp_ns->ns != NULL && !bdev_nvme_compare_ns(nvme_ns->ns, tmp_ns->ns)) {
    4562           1 :                 pthread_mutex_unlock(&bdev->mutex);
    4563           1 :                 SPDK_ERRLOG("Namespaces are not identical.\n");
    4564           1 :                 return -EINVAL;
    4565             :         }
    4566             : 
    4567          12 :         bdev->ref++;
    4568          12 :         TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
    4569          12 :         nvme_ns->bdev = bdev;
    4570             : 
    4571          12 :         pthread_mutex_unlock(&bdev->mutex);
    4572             : 
    4573             :         /* Add nvme_io_path to nvme_bdev_channels dynamically. */
    4574          12 :         spdk_for_each_channel(bdev,
    4575             :                               bdev_nvme_add_io_path,
    4576             :                               nvme_ns,
    4577             :                               bdev_nvme_add_io_path_done);
    4578             : 
    4579          12 :         return 0;
    4580             : }
    4581             : 
    4582             : static void
    4583          50 : nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4584             : {
    4585             :         struct spdk_nvme_ns     *ns;
    4586             :         struct nvme_bdev        *bdev;
    4587          50 :         int                     rc = 0;
    4588             : 
    4589          50 :         ns = spdk_nvme_ctrlr_get_ns(nvme_ctrlr->ctrlr, nvme_ns->id);
    4590          50 :         if (!ns) {
    4591           0 :                 SPDK_DEBUGLOG(bdev_nvme, "Invalid NS %d\n", nvme_ns->id);
    4592           0 :                 rc = -EINVAL;
    4593           0 :                 goto done;
    4594             :         }
    4595             : 
    4596          50 :         nvme_ns->ns = ns;
    4597          50 :         nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
    4598             : 
    4599          50 :         if (nvme_ctrlr->ana_log_page != NULL) {
    4600          37 :                 bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns);
    4601             :         }
    4602             : 
    4603          50 :         bdev = nvme_bdev_ctrlr_get_bdev(nvme_ctrlr->nbdev_ctrlr, nvme_ns->id);
    4604          50 :         if (bdev == NULL) {
    4605          37 :                 rc = nvme_bdev_create(nvme_ctrlr, nvme_ns);
    4606             :         } else {
    4607          13 :                 rc = nvme_bdev_add_ns(bdev, nvme_ns);
    4608          13 :                 if (rc == 0) {
    4609          12 :                         return;
    4610             :                 }
    4611             :         }
    4612           1 : done:
    4613          38 :         nvme_ctrlr_populate_namespace_done(nvme_ns, rc);
    4614             : }
    4615             : 
    4616             : static void
    4617          48 : nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns)
    4618             : {
    4619          48 :         struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
    4620             : 
    4621          48 :         assert(nvme_ctrlr != NULL);
    4622             : 
    4623          48 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4624             : 
    4625          48 :         RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4626             : 
    4627          48 :         if (nvme_ns->bdev != NULL) {
    4628           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4629           0 :                 return;
    4630             :         }
    4631             : 
    4632          48 :         nvme_ns_free(nvme_ns);
    4633          48 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4634             : 
    4635          48 :         nvme_ctrlr_release(nvme_ctrlr);
    4636             : }
    4637             : 
    4638             : static void
    4639          11 : bdev_nvme_delete_io_path_done(struct spdk_io_channel_iter *i, int status)
    4640             : {
    4641          11 :         struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
    4642             : 
    4643          11 :         nvme_ctrlr_depopulate_namespace_done(nvme_ns);
    4644          11 : }
    4645             : 
    4646             : static void
    4647          48 : nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4648             : {
    4649             :         struct nvme_bdev *bdev;
    4650             : 
    4651          48 :         spdk_poller_unregister(&nvme_ns->anatt_timer);
    4652             : 
    4653          48 :         bdev = nvme_ns->bdev;
    4654          48 :         if (bdev != NULL) {
    4655          44 :                 pthread_mutex_lock(&bdev->mutex);
    4656             : 
    4657          44 :                 assert(bdev->ref > 0);
    4658          44 :                 bdev->ref--;
    4659          44 :                 if (bdev->ref == 0) {
    4660          33 :                         pthread_mutex_unlock(&bdev->mutex);
    4661             : 
    4662          33 :                         spdk_bdev_unregister(&bdev->disk, NULL, NULL);
    4663             :                 } else {
    4664             :                         /* spdk_bdev_unregister() is not called until the last nvme_ns is
    4665             :                          * depopulated. Hence we need to remove nvme_ns from bdev->nvme_ns_list
    4666             :                          * and clear nvme_ns->bdev here.
    4667             :                          */
    4668          11 :                         TAILQ_REMOVE(&bdev->nvme_ns_list, nvme_ns, tailq);
    4669          11 :                         nvme_ns->bdev = NULL;
    4670             : 
    4671          11 :                         pthread_mutex_unlock(&bdev->mutex);
    4672             : 
    4673             :                         /* Delete nvme_io_paths from nvme_bdev_channels dynamically. After that,
    4674             :                          * we call depopulate_namespace_done() to avoid use-after-free.
    4675             :                          */
    4676          11 :                         spdk_for_each_channel(bdev,
    4677             :                                               bdev_nvme_delete_io_path,
    4678             :                                               nvme_ns,
    4679             :                                               bdev_nvme_delete_io_path_done);
    4680          11 :                         return;
    4681             :                 }
    4682             :         }
    4683             : 
    4684          37 :         nvme_ctrlr_depopulate_namespace_done(nvme_ns);
    4685             : }
    4686             : 
    4687             : static void
    4688          61 : nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
    4689             :                                struct nvme_async_probe_ctx *ctx)
    4690             : {
    4691          61 :         struct spdk_nvme_ctrlr  *ctrlr = nvme_ctrlr->ctrlr;
    4692             :         struct nvme_ns  *nvme_ns, *next;
    4693             :         struct spdk_nvme_ns     *ns;
    4694             :         struct nvme_bdev        *bdev;
    4695             :         uint32_t                nsid;
    4696             :         int                     rc;
    4697             :         uint64_t                num_sectors;
    4698             : 
    4699          61 :         if (ctx) {
    4700             :                 /* Initialize this count to 1 to handle the populate functions
    4701             :                  * calling nvme_ctrlr_populate_namespace_done() immediately.
    4702             :                  */
    4703          45 :                 ctx->populates_in_progress = 1;
    4704             :         }
    4705             : 
    4706             :         /* First loop over our existing namespaces and see if they have been
    4707             :          * removed. */
    4708          61 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    4709          65 :         while (nvme_ns != NULL) {
    4710           4 :                 next = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    4711             : 
    4712           4 :                 if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
    4713             :                         /* NS is still there or added again. Its attributes may have changed. */
    4714           3 :                         ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id);
    4715           3 :                         if (nvme_ns->ns != ns) {
    4716           1 :                                 assert(nvme_ns->ns == NULL);
    4717           1 :                                 nvme_ns->ns = ns;
    4718           1 :                                 SPDK_DEBUGLOG(bdev_nvme, "NSID %u was added\n", nvme_ns->id);
    4719             :                         }
    4720             : 
    4721           3 :                         num_sectors = spdk_nvme_ns_get_num_sectors(ns);
    4722           3 :                         bdev = nvme_ns->bdev;
    4723           3 :                         assert(bdev != NULL);
    4724           3 :                         if (bdev->disk.blockcnt != num_sectors) {
    4725           1 :                                 SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %" PRIu64 ", new size %" PRIu64 "\n",
    4726             :                                                nvme_ns->id,
    4727             :                                                bdev->disk.name,
    4728             :                                                bdev->disk.blockcnt,
    4729             :                                                num_sectors);
    4730           1 :                                 rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors);
    4731           1 :                                 if (rc != 0) {
    4732           0 :                                         SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n",
    4733             :                                                     bdev->disk.name, rc);
    4734             :                                 }
    4735             :                         }
    4736             :                 } else {
    4737             :                         /* Namespace was removed */
    4738           1 :                         nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
    4739             :                 }
    4740             : 
    4741           4 :                 nvme_ns = next;
    4742             :         }
    4743             : 
    4744             :         /* Loop through all of the namespaces at the nvme level and see if any of them are new */
    4745          61 :         nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
    4746         114 :         while (nsid != 0) {
    4747          53 :                 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
    4748             : 
    4749          53 :                 if (nvme_ns == NULL) {
    4750             :                         /* Found a new one */
    4751          50 :                         nvme_ns = nvme_ns_alloc();
    4752          50 :                         if (nvme_ns == NULL) {
    4753           0 :                                 SPDK_ERRLOG("Failed to allocate namespace\n");
    4754             :                                 /* This just fails to attach the namespace. It may work on a future attempt. */
    4755           0 :                                 continue;
    4756             :                         }
    4757             : 
    4758          50 :                         nvme_ns->id = nsid;
    4759          50 :                         nvme_ns->ctrlr = nvme_ctrlr;
    4760             : 
    4761          50 :                         nvme_ns->bdev = NULL;
    4762             : 
    4763          50 :                         if (ctx) {
    4764          49 :                                 ctx->populates_in_progress++;
    4765             :                         }
    4766          50 :                         nvme_ns->probe_ctx = ctx;
    4767             : 
    4768          50 :                         RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4769             : 
    4770          50 :                         nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns);
    4771             :                 }
    4772             : 
    4773          53 :                 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid);
    4774             :         }
    4775             : 
    4776          61 :         if (ctx) {
    4777             :                 /* Decrement this count now that the loop is over to account
    4778             :                  * for the one we started with.  If the count is then 0, we
    4779             :                  * know any populate_namespace functions completed immediately,
    4780             :                  * so we'll kick the callback here.
    4781             :                  */
    4782          45 :                 ctx->populates_in_progress--;
    4783          45 :                 if (ctx->populates_in_progress == 0) {
    4784          33 :                         nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
    4785             :                 }
    4786             :         }
    4787             : 
    4788          61 : }
    4789             : 
    4790             : static void
    4791          60 : nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
    4792             : {
    4793             :         struct nvme_ns *nvme_ns, *tmp;
    4794             : 
    4795         107 :         RB_FOREACH_SAFE(nvme_ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp) {
    4796          47 :                 nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
    4797             :         }
    4798          60 : }
    4799             : 
    4800             : static uint32_t
    4801          36 : nvme_ctrlr_get_ana_log_page_size(struct nvme_ctrlr *nvme_ctrlr)
    4802             : {
    4803          36 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    4804             :         const struct spdk_nvme_ctrlr_data *cdata;
    4805          36 :         uint32_t nsid, ns_count = 0;
    4806             : 
    4807          36 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    4808             : 
    4809          36 :         for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
    4810          80 :              nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
    4811          44 :                 ns_count++;
    4812             :         }
    4813             : 
    4814          36 :         return sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
    4815          36 :                sizeof(struct spdk_nvme_ana_group_descriptor) + ns_count *
    4816             :                sizeof(uint32_t);
    4817             : }
    4818             : 
    4819             : static int
    4820           7 : nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
    4821             :                           void *cb_arg)
    4822             : {
    4823           7 :         struct nvme_ctrlr *nvme_ctrlr = cb_arg;
    4824             :         struct nvme_ns *nvme_ns;
    4825             :         uint32_t i, nsid;
    4826             : 
    4827          13 :         for (i = 0; i < desc->num_of_nsid; i++) {
    4828           6 :                 nsid = desc->nsid[i];
    4829           6 :                 if (nsid == 0) {
    4830           0 :                         continue;
    4831             :                 }
    4832             : 
    4833           6 :                 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
    4834             : 
    4835           6 :                 if (nvme_ns == NULL) {
    4836             :                         /* Target told us that an inactive namespace had an ANA change */
    4837           1 :                         continue;
    4838             :                 }
    4839             : 
    4840           5 :                 _nvme_ns_set_ana_state(nvme_ns, desc);
    4841             :         }
    4842             : 
    4843           7 :         return 0;
    4844             : }
    4845             : 
    4846             : static void
    4847           0 : bdev_nvme_disable_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
    4848             : {
    4849             :         struct nvme_ns *nvme_ns;
    4850             : 
    4851           0 :         spdk_free(nvme_ctrlr->ana_log_page);
    4852           0 :         nvme_ctrlr->ana_log_page = NULL;
    4853             : 
    4854           0 :         for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    4855           0 :              nvme_ns != NULL;
    4856           0 :              nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
    4857           0 :                 nvme_ns->ana_state_updating = false;
    4858           0 :                 nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
    4859             :         }
    4860           0 : }
    4861             : 
    4862             : static void
    4863           3 : nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
    4864             : {
    4865           3 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    4866             : 
    4867           3 :         if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
    4868           3 :                 bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
    4869             :                                              nvme_ctrlr);
    4870             :         } else {
    4871           0 :                 bdev_nvme_disable_read_ana_log_page(nvme_ctrlr);
    4872             :         }
    4873             : 
    4874           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4875             : 
    4876           3 :         assert(nvme_ctrlr->ana_log_page_updating == true);
    4877           3 :         nvme_ctrlr->ana_log_page_updating = false;
    4878             : 
    4879           3 :         if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    4880           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4881             : 
    4882           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    4883             :         } else {
    4884           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4885             : 
    4886           3 :                 bdev_nvme_clear_io_path_caches(nvme_ctrlr);
    4887             :         }
    4888           3 : }
    4889             : 
    4890             : static int
    4891           6 : nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
    4892             : {
    4893             :         uint32_t ana_log_page_size;
    4894             :         int rc;
    4895             : 
    4896           6 :         if (nvme_ctrlr->ana_log_page == NULL) {
    4897           0 :                 return -EINVAL;
    4898             :         }
    4899             : 
    4900           6 :         ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
    4901             : 
    4902           6 :         if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
    4903           0 :                 SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
    4904             :                             ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
    4905           0 :                 return -EINVAL;
    4906             :         }
    4907             : 
    4908           6 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4909           6 :         if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
    4910             :             nvme_ctrlr->ana_log_page_updating) {
    4911           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4912           3 :                 return -EBUSY;
    4913             :         }
    4914             : 
    4915           3 :         nvme_ctrlr->ana_log_page_updating = true;
    4916           3 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4917             : 
    4918           3 :         rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
    4919             :                                               SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
    4920             :                                               SPDK_NVME_GLOBAL_NS_TAG,
    4921           3 :                                               nvme_ctrlr->ana_log_page,
    4922             :                                               ana_log_page_size, 0,
    4923             :                                               nvme_ctrlr_read_ana_log_page_done,
    4924             :                                               nvme_ctrlr);
    4925           3 :         if (rc != 0) {
    4926           0 :                 nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
    4927             :         }
    4928             : 
    4929           3 :         return rc;
    4930             : }
    4931             : 
    4932             : static void
    4933           0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
    4934             : {
    4935           0 : }
    4936             : 
    4937             : struct bdev_nvme_set_preferred_path_ctx {
    4938             :         struct spdk_bdev_desc *desc;
    4939             :         struct nvme_ns *nvme_ns;
    4940             :         bdev_nvme_set_preferred_path_cb cb_fn;
    4941             :         void *cb_arg;
    4942             : };
    4943             : 
    4944             : static void
    4945           3 : bdev_nvme_set_preferred_path_done(struct spdk_io_channel_iter *i, int status)
    4946             : {
    4947           3 :         struct bdev_nvme_set_preferred_path_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
    4948             : 
    4949           3 :         assert(ctx != NULL);
    4950           3 :         assert(ctx->desc != NULL);
    4951           3 :         assert(ctx->cb_fn != NULL);
    4952             : 
    4953           3 :         spdk_bdev_close(ctx->desc);
    4954             : 
    4955           3 :         ctx->cb_fn(ctx->cb_arg, status);
    4956             : 
    4957           3 :         free(ctx);
    4958           3 : }
    4959             : 
    4960             : static void
    4961           2 : _bdev_nvme_set_preferred_path(struct spdk_io_channel_iter *i)
    4962             : {
    4963           2 :         struct bdev_nvme_set_preferred_path_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
    4964           2 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    4965           2 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
    4966             :         struct nvme_io_path *io_path, *prev;
    4967             : 
    4968           2 :         prev = NULL;
    4969           3 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    4970           3 :                 if (io_path->nvme_ns == ctx->nvme_ns) {
    4971           2 :                         break;
    4972             :                 }
    4973           1 :                 prev = io_path;
    4974             :         }
    4975             : 
    4976           2 :         if (io_path != NULL) {
    4977           2 :                 if (prev != NULL) {
    4978           1 :                         STAILQ_REMOVE_AFTER(&nbdev_ch->io_path_list, prev, stailq);
    4979           1 :                         STAILQ_INSERT_HEAD(&nbdev_ch->io_path_list, io_path, stailq);
    4980             :                 }
    4981             : 
    4982             :                 /* We can set io_path to nbdev_ch->current_io_path directly here.
    4983             :                  * However, it needs to be conditional. To simplify the code,
    4984             :                  * just clear nbdev_ch->current_io_path and let find_io_path()
    4985             :                  * fill it.
    4986             :                  *
    4987             :                  * Automatic failback may be disabled. Hence even if the io_path is
    4988             :                  * already at the head, clear nbdev_ch->current_io_path.
    4989             :                  */
    4990           2 :                 bdev_nvme_clear_current_io_path(nbdev_ch);
    4991             :         }
    4992             : 
    4993           2 :         spdk_for_each_channel_continue(i, 0);
    4994           2 : }
    4995             : 
    4996             : static struct nvme_ns *
    4997           3 : bdev_nvme_set_preferred_ns(struct nvme_bdev *nbdev, uint16_t cntlid)
    4998             : {
    4999             :         struct nvme_ns *nvme_ns, *prev;
    5000             :         const struct spdk_nvme_ctrlr_data *cdata;
    5001             : 
    5002           3 :         prev = NULL;
    5003           6 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
    5004           6 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
    5005             : 
    5006           6 :                 if (cdata->cntlid == cntlid) {
    5007           3 :                         break;
    5008             :                 }
    5009           3 :                 prev = nvme_ns;
    5010             :         }
    5011             : 
    5012           3 :         if (nvme_ns != NULL && prev != NULL) {
    5013           2 :                 TAILQ_REMOVE(&nbdev->nvme_ns_list, nvme_ns, tailq);
    5014           2 :                 TAILQ_INSERT_HEAD(&nbdev->nvme_ns_list, nvme_ns, tailq);
    5015             :         }
    5016             : 
    5017           3 :         return nvme_ns;
    5018             : }
    5019             : 
    5020             : /* This function supports only multipath mode. There is only a single I/O path
    5021             :  * for each NVMe-oF controller. Hence, just move the matched I/O path to the
    5022             :  * head of the I/O path list for each NVMe bdev channel.
    5023             :  *
    5024             :  * NVMe bdev channel may be acquired after completing this function. move the
    5025             :  * matched namespace to the head of the namespace list for the NVMe bdev too.
    5026             :  */
    5027             : void
    5028           3 : bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid,
    5029             :                              bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg)
    5030             : {
    5031             :         struct bdev_nvme_set_preferred_path_ctx *ctx;
    5032             :         struct spdk_bdev *bdev;
    5033             :         struct nvme_bdev *nbdev;
    5034           3 :         int rc = 0;
    5035             : 
    5036           3 :         assert(cb_fn != NULL);
    5037             : 
    5038           3 :         ctx = calloc(1, sizeof(*ctx));
    5039           3 :         if (ctx == NULL) {
    5040           0 :                 SPDK_ERRLOG("Failed to alloc context.\n");
    5041           0 :                 rc = -ENOMEM;
    5042           0 :                 goto err_alloc;
    5043             :         }
    5044             : 
    5045           3 :         ctx->cb_fn = cb_fn;
    5046           3 :         ctx->cb_arg = cb_arg;
    5047             : 
    5048           3 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
    5049           3 :         if (rc != 0) {
    5050           0 :                 SPDK_ERRLOG("Failed to open bdev %s.\n", name);
    5051           0 :                 goto err_open;
    5052             :         }
    5053             : 
    5054           3 :         bdev = spdk_bdev_desc_get_bdev(ctx->desc);
    5055             : 
    5056           3 :         if (bdev->module != &nvme_if) {
    5057           0 :                 SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
    5058           0 :                 rc = -ENODEV;
    5059           0 :                 goto err_bdev;
    5060             :         }
    5061             : 
    5062           3 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    5063             : 
    5064           3 :         pthread_mutex_lock(&nbdev->mutex);
    5065             : 
    5066           3 :         ctx->nvme_ns = bdev_nvme_set_preferred_ns(nbdev, cntlid);
    5067           3 :         if (ctx->nvme_ns == NULL) {
    5068           0 :                 pthread_mutex_unlock(&nbdev->mutex);
    5069             : 
    5070           0 :                 SPDK_ERRLOG("bdev %s does not have namespace to controller %u.\n", name, cntlid);
    5071           0 :                 rc = -ENODEV;
    5072           0 :                 goto err_bdev;
    5073             :         }
    5074             : 
    5075           3 :         pthread_mutex_unlock(&nbdev->mutex);
    5076             : 
    5077           3 :         spdk_for_each_channel(nbdev,
    5078             :                               _bdev_nvme_set_preferred_path,
    5079             :                               ctx,
    5080             :                               bdev_nvme_set_preferred_path_done);
    5081           3 :         return;
    5082             : 
    5083           0 : err_bdev:
    5084           0 :         spdk_bdev_close(ctx->desc);
    5085           0 : err_open:
    5086           0 :         free(ctx);
    5087           0 : err_alloc:
    5088           0 :         cb_fn(cb_arg, rc);
    5089             : }
    5090             : 
    5091             : struct bdev_nvme_set_multipath_policy_ctx {
    5092             :         struct spdk_bdev_desc *desc;
    5093             :         bdev_nvme_set_multipath_policy_cb cb_fn;
    5094             :         void *cb_arg;
    5095             : };
    5096             : 
    5097             : static void
    5098           3 : bdev_nvme_set_multipath_policy_done(struct spdk_io_channel_iter *i, int status)
    5099             : {
    5100           3 :         struct bdev_nvme_set_multipath_policy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
    5101             : 
    5102           3 :         assert(ctx != NULL);
    5103           3 :         assert(ctx->desc != NULL);
    5104           3 :         assert(ctx->cb_fn != NULL);
    5105             : 
    5106           3 :         spdk_bdev_close(ctx->desc);
    5107             : 
    5108           3 :         ctx->cb_fn(ctx->cb_arg, status);
    5109             : 
    5110           3 :         free(ctx);
    5111           3 : }
    5112             : 
    5113             : static void
    5114           1 : _bdev_nvme_set_multipath_policy(struct spdk_io_channel_iter *i)
    5115             : {
    5116           1 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    5117           1 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
    5118           1 :         struct nvme_bdev *nbdev = spdk_io_channel_get_io_device(_ch);
    5119             : 
    5120           1 :         nbdev_ch->mp_policy = nbdev->mp_policy;
    5121           1 :         nbdev_ch->mp_selector = nbdev->mp_selector;
    5122           1 :         nbdev_ch->rr_min_io = nbdev->rr_min_io;
    5123           1 :         bdev_nvme_clear_current_io_path(nbdev_ch);
    5124             : 
    5125           1 :         spdk_for_each_channel_continue(i, 0);
    5126           1 : }
    5127             : 
    5128             : void
    5129           3 : bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy policy,
    5130             :                                enum bdev_nvme_multipath_selector selector, uint32_t rr_min_io,
    5131             :                                bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
    5132             : {
    5133             :         struct bdev_nvme_set_multipath_policy_ctx *ctx;
    5134             :         struct spdk_bdev *bdev;
    5135             :         struct nvme_bdev *nbdev;
    5136             :         int rc;
    5137             : 
    5138           3 :         assert(cb_fn != NULL);
    5139             : 
    5140           3 :         switch (policy) {
    5141           1 :         case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
    5142           1 :                 break;
    5143           2 :         case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
    5144             :                 switch (selector) {
    5145           1 :                 case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
    5146           1 :                         if (rr_min_io == UINT32_MAX) {
    5147           0 :                                 rr_min_io = 1;
    5148           1 :                         } else if (rr_min_io == 0) {
    5149           0 :                                 rc = -EINVAL;
    5150           0 :                                 goto exit;
    5151             :                         }
    5152           1 :                         break;
    5153           1 :                 case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
    5154           1 :                         break;
    5155           0 :                 default:
    5156           0 :                         rc = -EINVAL;
    5157           0 :                         goto exit;
    5158             :                 }
    5159           2 :                 break;
    5160           0 :         default:
    5161           0 :                 rc = -EINVAL;
    5162           0 :                 goto exit;
    5163             :         }
    5164             : 
    5165           3 :         ctx = calloc(1, sizeof(*ctx));
    5166           3 :         if (ctx == NULL) {
    5167           0 :                 SPDK_ERRLOG("Failed to alloc context.\n");
    5168           0 :                 rc = -ENOMEM;
    5169           0 :                 goto exit;
    5170             :         }
    5171             : 
    5172           3 :         ctx->cb_fn = cb_fn;
    5173           3 :         ctx->cb_arg = cb_arg;
    5174             : 
    5175           3 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
    5176           3 :         if (rc != 0) {
    5177           0 :                 SPDK_ERRLOG("Failed to open bdev %s.\n", name);
    5178           0 :                 rc = -ENODEV;
    5179           0 :                 goto err_open;
    5180             :         }
    5181             : 
    5182           3 :         bdev = spdk_bdev_desc_get_bdev(ctx->desc);
    5183           3 :         if (bdev->module != &nvme_if) {
    5184           0 :                 SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
    5185           0 :                 rc = -ENODEV;
    5186           0 :                 goto err_module;
    5187             :         }
    5188           3 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    5189             : 
    5190           3 :         pthread_mutex_lock(&nbdev->mutex);
    5191           3 :         nbdev->mp_policy = policy;
    5192           3 :         nbdev->mp_selector = selector;
    5193           3 :         nbdev->rr_min_io = rr_min_io;
    5194           3 :         pthread_mutex_unlock(&nbdev->mutex);
    5195             : 
    5196           3 :         spdk_for_each_channel(nbdev,
    5197             :                               _bdev_nvme_set_multipath_policy,
    5198             :                               ctx,
    5199             :                               bdev_nvme_set_multipath_policy_done);
    5200           3 :         return;
    5201             : 
    5202           0 : err_module:
    5203           0 :         spdk_bdev_close(ctx->desc);
    5204           0 : err_open:
    5205           0 :         free(ctx);
    5206           0 : exit:
    5207           0 :         cb_fn(cb_arg, rc);
    5208             : }
    5209             : 
    5210             : static void
    5211           3 : aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
    5212             : {
    5213           3 :         struct nvme_ctrlr *nvme_ctrlr           = arg;
    5214             :         union spdk_nvme_async_event_completion  event;
    5215             : 
    5216           3 :         if (spdk_nvme_cpl_is_error(cpl)) {
    5217           0 :                 SPDK_WARNLOG("AER request execute failed\n");
    5218           0 :                 return;
    5219             :         }
    5220             : 
    5221           3 :         event.raw = cpl->cdw0;
    5222           3 :         if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
    5223           3 :             (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
    5224           2 :                 nvme_ctrlr_populate_namespaces(nvme_ctrlr, NULL);
    5225           1 :         } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
    5226           1 :                    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
    5227           1 :                 nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
    5228             :         }
    5229             : }
    5230             : 
    5231             : static void
    5232          51 : free_nvme_async_probe_ctx(struct nvme_async_probe_ctx *ctx)
    5233             : {
    5234          51 :         spdk_keyring_put_key(ctx->drv_opts.tls_psk);
    5235          51 :         spdk_keyring_put_key(ctx->drv_opts.dhchap_key);
    5236          51 :         spdk_keyring_put_key(ctx->drv_opts.dhchap_ctrlr_key);
    5237          51 :         free(ctx);
    5238          51 : }
    5239             : 
    5240             : static void
    5241          51 : populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, int rc)
    5242             : {
    5243          51 :         if (ctx->cb_fn) {
    5244          51 :                 ctx->cb_fn(ctx->cb_ctx, ctx->reported_bdevs, rc);
    5245             :         }
    5246             : 
    5247          51 :         ctx->namespaces_populated = true;
    5248          51 :         if (ctx->probe_done) {
    5249             :                 /* The probe was already completed, so we need to free the context
    5250             :                  * here.  This can happen for cases like OCSSD, where we need to
    5251             :                  * send additional commands to the SSD after attach.
    5252             :                  */
    5253          31 :                 free_nvme_async_probe_ctx(ctx);
    5254             :         }
    5255          51 : }
    5256             : 
    5257             : static int
    5258          18 : bdev_nvme_remove_poller(void *ctx)
    5259             : {
    5260          18 :         struct spdk_nvme_transport_id trid_pcie;
    5261             : 
    5262          18 :         if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    5263           1 :                 spdk_poller_unregister(&g_hotplug_poller);
    5264           1 :                 return SPDK_POLLER_IDLE;
    5265             :         }
    5266             : 
    5267          17 :         memset(&trid_pcie, 0, sizeof(trid_pcie));
    5268          17 :         spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
    5269             : 
    5270          17 :         if (spdk_nvme_scan_attached(&trid_pcie)) {
    5271           0 :                 SPDK_ERRLOG_RATELIMIT("spdk_nvme_scan_attached() failed\n");
    5272             :         }
    5273             : 
    5274          17 :         return SPDK_POLLER_BUSY;
    5275             : }
    5276             : 
    5277             : static void
    5278          59 : nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr,
    5279             :                        struct nvme_async_probe_ctx *ctx)
    5280             : {
    5281          59 :         spdk_io_device_register(nvme_ctrlr,
    5282             :                                 bdev_nvme_create_ctrlr_channel_cb,
    5283             :                                 bdev_nvme_destroy_ctrlr_channel_cb,
    5284             :                                 sizeof(struct nvme_ctrlr_channel),
    5285          59 :                                 nvme_ctrlr->nbdev_ctrlr->name);
    5286             : 
    5287          59 :         nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx);
    5288             : 
    5289          59 :         if (g_hotplug_poller == NULL) {
    5290           2 :                 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
    5291             :                                                         NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
    5292             :         }
    5293          59 : }
    5294             : 
    5295             : static void
    5296          30 : nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl)
    5297             : {
    5298          30 :         struct nvme_ctrlr *nvme_ctrlr = _ctx;
    5299          30 :         struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx;
    5300             : 
    5301          30 :         nvme_ctrlr->probe_ctx = NULL;
    5302             : 
    5303          30 :         if (spdk_nvme_cpl_is_error(cpl)) {
    5304           0 :                 nvme_ctrlr_delete(nvme_ctrlr);
    5305             : 
    5306           0 :                 if (ctx != NULL) {
    5307           0 :                         ctx->reported_bdevs = 0;
    5308           0 :                         populate_namespaces_cb(ctx, -1);
    5309             :                 }
    5310           0 :                 return;
    5311             :         }
    5312             : 
    5313          30 :         nvme_ctrlr_create_done(nvme_ctrlr, ctx);
    5314             : }
    5315             : 
    5316             : static int
    5317          30 : nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
    5318             :                              struct nvme_async_probe_ctx *ctx)
    5319             : {
    5320          30 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    5321             :         const struct spdk_nvme_ctrlr_data *cdata;
    5322             :         uint32_t ana_log_page_size;
    5323             : 
    5324          30 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5325             : 
    5326             :         /* Set buffer size enough to include maximum number of allowed namespaces. */
    5327          30 :         ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
    5328          30 :                             sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->mnan *
    5329             :                             sizeof(uint32_t);
    5330             : 
    5331          30 :         nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL,
    5332             :                                                 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5333          30 :         if (nvme_ctrlr->ana_log_page == NULL) {
    5334           0 :                 SPDK_ERRLOG("could not allocate ANA log page buffer\n");
    5335           0 :                 return -ENXIO;
    5336             :         }
    5337             : 
    5338             :         /* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned.
    5339             :          * Hence copy each descriptor to a temporary area when parsing it.
    5340             :          *
    5341             :          * Allocate a buffer whose size is as large as ANA log page buffer because
    5342             :          * we do not know the size of a descriptor until actually reading it.
    5343             :          */
    5344          30 :         nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
    5345          30 :         if (nvme_ctrlr->copied_ana_desc == NULL) {
    5346           0 :                 SPDK_ERRLOG("could not allocate a buffer to parse ANA descriptor\n");
    5347           0 :                 return -ENOMEM;
    5348             :         }
    5349             : 
    5350          30 :         nvme_ctrlr->max_ana_log_page_size = ana_log_page_size;
    5351             : 
    5352          30 :         nvme_ctrlr->probe_ctx = ctx;
    5353             : 
    5354             :         /* Then, set the read size only to include the current active namespaces. */
    5355          30 :         ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
    5356             : 
    5357          30 :         if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
    5358           0 :                 SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
    5359             :                             ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
    5360           0 :                 return -EINVAL;
    5361             :         }
    5362             : 
    5363          30 :         return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
    5364             :                                                 SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
    5365             :                                                 SPDK_NVME_GLOBAL_NS_TAG,
    5366          30 :                                                 nvme_ctrlr->ana_log_page,
    5367             :                                                 ana_log_page_size, 0,
    5368             :                                                 nvme_ctrlr_init_ana_log_page_done,
    5369             :                                                 nvme_ctrlr);
    5370             : }
    5371             : 
    5372             : /* hostnqn and subnqn were already verified before attaching a controller.
    5373             :  * Hence check only the multipath capability and cntlid here.
    5374             :  */
    5375             : static bool
    5376          16 : bdev_nvme_check_multipath(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct spdk_nvme_ctrlr *ctrlr)
    5377             : {
    5378             :         struct nvme_ctrlr *tmp;
    5379             :         const struct spdk_nvme_ctrlr_data *cdata, *tmp_cdata;
    5380             : 
    5381          16 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5382             : 
    5383          16 :         if (!cdata->cmic.multi_ctrlr) {
    5384           0 :                 SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
    5385           0 :                 return false;
    5386             :         }
    5387             : 
    5388          33 :         TAILQ_FOREACH(tmp, &nbdev_ctrlr->ctrlrs, tailq) {
    5389          18 :                 tmp_cdata = spdk_nvme_ctrlr_get_data(tmp->ctrlr);
    5390             : 
    5391          18 :                 if (!tmp_cdata->cmic.multi_ctrlr) {
    5392           0 :                         SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
    5393           0 :                         return false;
    5394             :                 }
    5395          18 :                 if (cdata->cntlid == tmp_cdata->cntlid) {
    5396           1 :                         SPDK_ERRLOG("cntlid %u are duplicated.\n", tmp_cdata->cntlid);
    5397           1 :                         return false;
    5398             :                 }
    5399             :         }
    5400             : 
    5401          15 :         return true;
    5402             : }
    5403             : 
    5404             : static int
    5405          60 : nvme_bdev_ctrlr_create(const char *name, struct nvme_ctrlr *nvme_ctrlr)
    5406             : {
    5407             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    5408          60 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    5409          60 :         int rc = 0;
    5410             : 
    5411          60 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    5412             : 
    5413          60 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    5414          60 :         if (nbdev_ctrlr != NULL) {
    5415          16 :                 if (!bdev_nvme_check_multipath(nbdev_ctrlr, ctrlr)) {
    5416           1 :                         rc = -EINVAL;
    5417           1 :                         goto exit;
    5418             :                 }
    5419             :         } else {
    5420          44 :                 nbdev_ctrlr = calloc(1, sizeof(*nbdev_ctrlr));
    5421          44 :                 if (nbdev_ctrlr == NULL) {
    5422           0 :                         SPDK_ERRLOG("Failed to allocate nvme_bdev_ctrlr.\n");
    5423           0 :                         rc = -ENOMEM;
    5424           0 :                         goto exit;
    5425             :                 }
    5426          44 :                 nbdev_ctrlr->name = strdup(name);
    5427          44 :                 if (nbdev_ctrlr->name == NULL) {
    5428           0 :                         SPDK_ERRLOG("Failed to allocate name of nvme_bdev_ctrlr.\n");
    5429           0 :                         free(nbdev_ctrlr);
    5430           0 :                         goto exit;
    5431             :                 }
    5432          44 :                 TAILQ_INIT(&nbdev_ctrlr->ctrlrs);
    5433          44 :                 TAILQ_INIT(&nbdev_ctrlr->bdevs);
    5434          44 :                 TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
    5435             :         }
    5436          59 :         nvme_ctrlr->nbdev_ctrlr = nbdev_ctrlr;
    5437          59 :         TAILQ_INSERT_TAIL(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
    5438          60 : exit:
    5439          60 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    5440          60 :         return rc;
    5441             : }
    5442             : 
    5443             : static int
    5444          60 : nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
    5445             :                   const char *name,
    5446             :                   const struct spdk_nvme_transport_id *trid,
    5447             :                   struct nvme_async_probe_ctx *ctx)
    5448             : {
    5449             :         struct nvme_ctrlr *nvme_ctrlr;
    5450             :         struct nvme_path_id *path_id;
    5451             :         const struct spdk_nvme_ctrlr_data *cdata;
    5452             :         int rc;
    5453             : 
    5454          60 :         nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
    5455          60 :         if (nvme_ctrlr == NULL) {
    5456           0 :                 SPDK_ERRLOG("Failed to allocate device struct\n");
    5457           0 :                 return -ENOMEM;
    5458             :         }
    5459             : 
    5460          60 :         rc = pthread_mutex_init(&nvme_ctrlr->mutex, NULL);
    5461          60 :         if (rc != 0) {
    5462           0 :                 free(nvme_ctrlr);
    5463           0 :                 return rc;
    5464             :         }
    5465             : 
    5466          60 :         TAILQ_INIT(&nvme_ctrlr->trids);
    5467          60 :         RB_INIT(&nvme_ctrlr->namespaces);
    5468             : 
    5469             :         /* Get another reference to the key, so the first one can be released from probe_ctx */
    5470          60 :         if (ctx != NULL) {
    5471          46 :                 if (ctx->drv_opts.tls_psk != NULL) {
    5472           0 :                         nvme_ctrlr->psk = spdk_keyring_get_key(
    5473             :                                                   spdk_key_get_name(ctx->drv_opts.tls_psk));
    5474           0 :                         if (nvme_ctrlr->psk == NULL) {
    5475             :                                 /* Could only happen if the key was removed in the meantime */
    5476           0 :                                 SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
    5477             :                                             spdk_key_get_name(ctx->drv_opts.tls_psk));
    5478           0 :                                 rc = -ENOKEY;
    5479           0 :                                 goto err;
    5480             :                         }
    5481             :                 }
    5482             : 
    5483          46 :                 if (ctx->drv_opts.dhchap_key != NULL) {
    5484           0 :                         nvme_ctrlr->dhchap_key = spdk_keyring_get_key(
    5485             :                                                          spdk_key_get_name(ctx->drv_opts.dhchap_key));
    5486           0 :                         if (nvme_ctrlr->dhchap_key == NULL) {
    5487           0 :                                 SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
    5488             :                                             spdk_key_get_name(ctx->drv_opts.dhchap_key));
    5489           0 :                                 rc = -ENOKEY;
    5490           0 :                                 goto err;
    5491             :                         }
    5492             :                 }
    5493             : 
    5494          46 :                 if (ctx->drv_opts.dhchap_ctrlr_key != NULL) {
    5495           0 :                         nvme_ctrlr->dhchap_ctrlr_key =
    5496           0 :                                 spdk_keyring_get_key(
    5497             :                                         spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
    5498           0 :                         if (nvme_ctrlr->dhchap_ctrlr_key == NULL) {
    5499           0 :                                 SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
    5500             :                                             spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
    5501           0 :                                 rc = -ENOKEY;
    5502           0 :                                 goto err;
    5503             :                         }
    5504             :                 }
    5505             :         }
    5506             : 
    5507          60 :         path_id = calloc(1, sizeof(*path_id));
    5508          60 :         if (path_id == NULL) {
    5509           0 :                 SPDK_ERRLOG("Failed to allocate trid entry pointer\n");
    5510           0 :                 rc = -ENOMEM;
    5511           0 :                 goto err;
    5512             :         }
    5513             : 
    5514          60 :         path_id->trid = *trid;
    5515          60 :         if (ctx != NULL) {
    5516          46 :                 memcpy(path_id->hostid.hostaddr, ctx->drv_opts.src_addr, sizeof(path_id->hostid.hostaddr));
    5517          46 :                 memcpy(path_id->hostid.hostsvcid, ctx->drv_opts.src_svcid, sizeof(path_id->hostid.hostsvcid));
    5518             :         }
    5519          60 :         nvme_ctrlr->active_path_id = path_id;
    5520          60 :         TAILQ_INSERT_HEAD(&nvme_ctrlr->trids, path_id, link);
    5521             : 
    5522          60 :         nvme_ctrlr->thread = spdk_get_thread();
    5523          60 :         nvme_ctrlr->ctrlr = ctrlr;
    5524          60 :         nvme_ctrlr->ref = 1;
    5525             : 
    5526          60 :         if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
    5527           0 :                 SPDK_ERRLOG("OCSSDs are not supported");
    5528           0 :                 rc = -ENOTSUP;
    5529           0 :                 goto err;
    5530             :         }
    5531             : 
    5532          60 :         if (ctx != NULL) {
    5533          46 :                 memcpy(&nvme_ctrlr->opts, &ctx->bdev_opts, sizeof(ctx->bdev_opts));
    5534             :         } else {
    5535          14 :                 bdev_nvme_get_default_ctrlr_opts(&nvme_ctrlr->opts);
    5536             :         }
    5537             : 
    5538          60 :         nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr,
    5539             :                                           g_opts.nvme_adminq_poll_period_us);
    5540             : 
    5541          60 :         if (g_opts.timeout_us > 0) {
    5542             :                 /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */
    5543             :                 /* If timeout_admin_us is 0 (not specified), admin uses same timeout as IO. */
    5544           0 :                 uint64_t adm_timeout_us = (g_opts.timeout_admin_us == 0) ?
    5545           0 :                                           g_opts.timeout_us : g_opts.timeout_admin_us;
    5546           0 :                 spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
    5547             :                                 adm_timeout_us, timeout_cb, nvme_ctrlr);
    5548             :         }
    5549             : 
    5550          60 :         spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
    5551          60 :         spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr);
    5552             : 
    5553          60 :         if (spdk_nvme_ctrlr_get_flags(ctrlr) &
    5554             :             SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
    5555           0 :                 nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr);
    5556             :         }
    5557             : 
    5558          60 :         rc = nvme_bdev_ctrlr_create(name, nvme_ctrlr);
    5559          60 :         if (rc != 0) {
    5560           1 :                 goto err;
    5561             :         }
    5562             : 
    5563          59 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5564             : 
    5565          59 :         if (cdata->cmic.ana_reporting) {
    5566          30 :                 rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx);
    5567          30 :                 if (rc == 0) {
    5568          30 :                         return 0;
    5569             :                 }
    5570             :         } else {
    5571          29 :                 nvme_ctrlr_create_done(nvme_ctrlr, ctx);
    5572          29 :                 return 0;
    5573             :         }
    5574             : 
    5575           1 : err:
    5576           1 :         nvme_ctrlr_delete(nvme_ctrlr);
    5577           1 :         return rc;
    5578             : }
    5579             : 
    5580             : void
    5581          56 : bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts)
    5582             : {
    5583          56 :         opts->prchk_flags = 0;
    5584          56 :         opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
    5585          56 :         opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
    5586          56 :         opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
    5587          56 : }
    5588             : 
    5589             : static void
    5590           0 : attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    5591             :           struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
    5592             : {
    5593             :         char *name;
    5594             : 
    5595           0 :         name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
    5596           0 :         if (!name) {
    5597           0 :                 SPDK_ERRLOG("Failed to assign name to NVMe device\n");
    5598           0 :                 return;
    5599             :         }
    5600             : 
    5601           0 :         if (nvme_ctrlr_create(ctrlr, name, trid, NULL) == 0) {
    5602           0 :                 SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name);
    5603             :         } else {
    5604           0 :                 SPDK_ERRLOG("Failed to attach to %s (%s)\n", trid->traddr, name);
    5605             :         }
    5606             : 
    5607           0 :         free(name);
    5608             : }
    5609             : 
    5610             : static void
    5611          59 : _nvme_ctrlr_destruct(void *ctx)
    5612             : {
    5613          59 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    5614             : 
    5615          59 :         nvme_ctrlr_depopulate_namespaces(nvme_ctrlr);
    5616          59 :         nvme_ctrlr_release(nvme_ctrlr);
    5617          59 : }
    5618             : 
    5619             : static int
    5620          56 : bdev_nvme_delete_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
    5621             : {
    5622             :         struct nvme_probe_skip_entry *entry;
    5623             : 
    5624             :         /* The controller's destruction was already started */
    5625          56 :         if (nvme_ctrlr->destruct) {
    5626           0 :                 return -EALREADY;
    5627             :         }
    5628             : 
    5629          56 :         if (!hotplug &&
    5630          56 :             nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
    5631           0 :                 entry = calloc(1, sizeof(*entry));
    5632           0 :                 if (!entry) {
    5633           0 :                         return -ENOMEM;
    5634             :                 }
    5635           0 :                 entry->trid = nvme_ctrlr->active_path_id->trid;
    5636           0 :                 TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
    5637             :         }
    5638             : 
    5639          56 :         nvme_ctrlr->destruct = true;
    5640          56 :         return 0;
    5641             : }
    5642             : 
    5643             : static int
    5644           2 : bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
    5645             : {
    5646             :         int rc;
    5647             : 
    5648           2 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5649           2 :         rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, hotplug);
    5650           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5651             : 
    5652           2 :         if (rc == 0) {
    5653           2 :                 _nvme_ctrlr_destruct(nvme_ctrlr);
    5654           0 :         } else if (rc == -EALREADY) {
    5655           0 :                 rc = 0;
    5656             :         }
    5657             : 
    5658           2 :         return rc;
    5659             : }
    5660             : 
    5661             : static void
    5662           0 : remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
    5663             : {
    5664           0 :         struct nvme_ctrlr *nvme_ctrlr = cb_ctx;
    5665             : 
    5666           0 :         bdev_nvme_delete_ctrlr(nvme_ctrlr, true);
    5667           0 : }
    5668             : 
    5669             : static int
    5670           0 : bdev_nvme_hotplug_probe(void *arg)
    5671             : {
    5672           0 :         if (g_hotplug_probe_ctx == NULL) {
    5673           0 :                 spdk_poller_unregister(&g_hotplug_probe_poller);
    5674           0 :                 return SPDK_POLLER_IDLE;
    5675             :         }
    5676             : 
    5677           0 :         if (spdk_nvme_probe_poll_async(g_hotplug_probe_ctx) != -EAGAIN) {
    5678           0 :                 g_hotplug_probe_ctx = NULL;
    5679           0 :                 spdk_poller_unregister(&g_hotplug_probe_poller);
    5680             :         }
    5681             : 
    5682           0 :         return SPDK_POLLER_BUSY;
    5683             : }
    5684             : 
    5685             : static int
    5686           0 : bdev_nvme_hotplug(void *arg)
    5687             : {
    5688           0 :         struct spdk_nvme_transport_id trid_pcie;
    5689             : 
    5690           0 :         if (g_hotplug_probe_ctx) {
    5691           0 :                 return SPDK_POLLER_BUSY;
    5692             :         }
    5693             : 
    5694           0 :         memset(&trid_pcie, 0, sizeof(trid_pcie));
    5695           0 :         spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
    5696             : 
    5697           0 :         g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
    5698             :                               hotplug_probe_cb, attach_cb, NULL);
    5699             : 
    5700           0 :         if (g_hotplug_probe_ctx) {
    5701           0 :                 assert(g_hotplug_probe_poller == NULL);
    5702           0 :                 g_hotplug_probe_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug_probe, NULL, 1000);
    5703             :         }
    5704             : 
    5705           0 :         return SPDK_POLLER_BUSY;
    5706             : }
    5707             : 
    5708             : void
    5709           0 : bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
    5710             : {
    5711           0 :         *opts = g_opts;
    5712           0 : }
    5713             : 
    5714             : static bool bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
    5715             :                 uint32_t reconnect_delay_sec,
    5716             :                 uint32_t fast_io_fail_timeout_sec);
    5717             : 
    5718             : static int
    5719           0 : bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
    5720             : {
    5721           0 :         if ((opts->timeout_us == 0) && (opts->timeout_admin_us != 0)) {
    5722             :                 /* Can't set timeout_admin_us without also setting timeout_us */
    5723           0 :                 SPDK_WARNLOG("Invalid options: Can't have (timeout_us == 0) with (timeout_admin_us > 0)\n");
    5724           0 :                 return -EINVAL;
    5725             :         }
    5726             : 
    5727           0 :         if (opts->bdev_retry_count < -1) {
    5728           0 :                 SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n");
    5729           0 :                 return -EINVAL;
    5730             :         }
    5731             : 
    5732           0 :         if (!bdev_nvme_check_io_error_resiliency_params(opts->ctrlr_loss_timeout_sec,
    5733           0 :                         opts->reconnect_delay_sec,
    5734           0 :                         opts->fast_io_fail_timeout_sec)) {
    5735           0 :                 return -EINVAL;
    5736             :         }
    5737             : 
    5738           0 :         return 0;
    5739             : }
    5740             : 
    5741             : int
    5742           0 : bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
    5743             : {
    5744             :         int ret;
    5745             : 
    5746           0 :         ret = bdev_nvme_validate_opts(opts);
    5747           0 :         if (ret) {
    5748           0 :                 SPDK_WARNLOG("Failed to set nvme opts.\n");
    5749           0 :                 return ret;
    5750             :         }
    5751             : 
    5752           0 :         if (g_bdev_nvme_init_thread != NULL) {
    5753           0 :                 if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    5754           0 :                         return -EPERM;
    5755             :                 }
    5756             :         }
    5757             : 
    5758           0 :         if (opts->rdma_srq_size != 0 ||
    5759           0 :             opts->rdma_max_cq_size != 0 ||
    5760           0 :             opts->rdma_cm_event_timeout_ms != 0) {
    5761           0 :                 struct spdk_nvme_transport_opts drv_opts;
    5762             : 
    5763           0 :                 spdk_nvme_transport_get_opts(&drv_opts, sizeof(drv_opts));
    5764           0 :                 if (opts->rdma_srq_size != 0) {
    5765           0 :                         drv_opts.rdma_srq_size = opts->rdma_srq_size;
    5766             :                 }
    5767           0 :                 if (opts->rdma_max_cq_size != 0) {
    5768           0 :                         drv_opts.rdma_max_cq_size = opts->rdma_max_cq_size;
    5769             :                 }
    5770           0 :                 if (opts->rdma_cm_event_timeout_ms != 0) {
    5771           0 :                         drv_opts.rdma_cm_event_timeout_ms = opts->rdma_cm_event_timeout_ms;
    5772             :                 }
    5773             : 
    5774           0 :                 ret = spdk_nvme_transport_set_opts(&drv_opts, sizeof(drv_opts));
    5775           0 :                 if (ret) {
    5776           0 :                         SPDK_ERRLOG("Failed to set NVMe transport opts.\n");
    5777           0 :                         return ret;
    5778             :                 }
    5779             :         }
    5780             : 
    5781           0 :         g_opts = *opts;
    5782             : 
    5783           0 :         return 0;
    5784             : }
    5785             : 
    5786             : struct set_nvme_hotplug_ctx {
    5787             :         uint64_t period_us;
    5788             :         bool enabled;
    5789             :         spdk_msg_fn fn;
    5790             :         void *fn_ctx;
    5791             : };
    5792             : 
    5793             : static void
    5794           0 : set_nvme_hotplug_period_cb(void *_ctx)
    5795             : {
    5796           0 :         struct set_nvme_hotplug_ctx *ctx = _ctx;
    5797             : 
    5798           0 :         spdk_poller_unregister(&g_hotplug_poller);
    5799           0 :         if (ctx->enabled) {
    5800           0 :                 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us);
    5801             :         } else {
    5802           0 :                 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
    5803             :                                                         NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
    5804             :         }
    5805             : 
    5806           0 :         g_nvme_hotplug_poll_period_us = ctx->period_us;
    5807           0 :         g_nvme_hotplug_enabled = ctx->enabled;
    5808           0 :         if (ctx->fn) {
    5809           0 :                 ctx->fn(ctx->fn_ctx);
    5810             :         }
    5811             : 
    5812           0 :         free(ctx);
    5813           0 : }
    5814             : 
    5815             : int
    5816           0 : bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
    5817             : {
    5818             :         struct set_nvme_hotplug_ctx *ctx;
    5819             : 
    5820           0 :         if (enabled == true && !spdk_process_is_primary()) {
    5821           0 :                 return -EPERM;
    5822             :         }
    5823             : 
    5824           0 :         ctx = calloc(1, sizeof(*ctx));
    5825           0 :         if (ctx == NULL) {
    5826           0 :                 return -ENOMEM;
    5827             :         }
    5828             : 
    5829           0 :         period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
    5830           0 :         ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
    5831           0 :         ctx->enabled = enabled;
    5832           0 :         ctx->fn = cb;
    5833           0 :         ctx->fn_ctx = cb_ctx;
    5834             : 
    5835           0 :         spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
    5836           0 :         return 0;
    5837             : }
    5838             : 
    5839             : static void
    5840          45 : nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
    5841             :                                     struct nvme_async_probe_ctx *ctx)
    5842             : {
    5843             :         struct nvme_ns  *nvme_ns;
    5844             :         struct nvme_bdev        *nvme_bdev;
    5845             :         size_t                  j;
    5846             : 
    5847          45 :         assert(nvme_ctrlr != NULL);
    5848             : 
    5849          45 :         if (ctx->names == NULL) {
    5850           0 :                 ctx->reported_bdevs = 0;
    5851           0 :                 populate_namespaces_cb(ctx, 0);
    5852           0 :                 return;
    5853             :         }
    5854             : 
    5855             :         /*
    5856             :          * Report the new bdevs that were created in this call.
    5857             :          * There can be more than one bdev per NVMe controller.
    5858             :          */
    5859          45 :         j = 0;
    5860          45 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    5861          92 :         while (nvme_ns != NULL) {
    5862          47 :                 nvme_bdev = nvme_ns->bdev;
    5863          47 :                 if (j < ctx->max_bdevs) {
    5864          47 :                         ctx->names[j] = nvme_bdev->disk.name;
    5865          47 :                         j++;
    5866             :                 } else {
    5867           0 :                         SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n",
    5868             :                                     ctx->max_bdevs);
    5869           0 :                         ctx->reported_bdevs = 0;
    5870           0 :                         populate_namespaces_cb(ctx, -ERANGE);
    5871           0 :                         return;
    5872             :                 }
    5873             : 
    5874          47 :                 nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    5875             :         }
    5876             : 
    5877          45 :         ctx->reported_bdevs = j;
    5878          45 :         populate_namespaces_cb(ctx, 0);
    5879             : }
    5880             : 
    5881             : static int
    5882           9 : bdev_nvme_check_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    5883             :                                struct spdk_nvme_ctrlr *new_ctrlr,
    5884             :                                struct spdk_nvme_transport_id *trid)
    5885             : {
    5886             :         struct nvme_path_id *tmp_trid;
    5887             : 
    5888           9 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    5889           0 :                 SPDK_ERRLOG("PCIe failover is not supported.\n");
    5890           0 :                 return -ENOTSUP;
    5891             :         }
    5892             : 
    5893             :         /* Currently we only support failover to the same transport type. */
    5894           9 :         if (nvme_ctrlr->active_path_id->trid.trtype != trid->trtype) {
    5895           0 :                 SPDK_WARNLOG("Failover from trtype: %s to a different trtype: %s is not supported currently\n",
    5896             :                              spdk_nvme_transport_id_trtype_str(nvme_ctrlr->active_path_id->trid.trtype),
    5897             :                              spdk_nvme_transport_id_trtype_str(trid->trtype));
    5898           0 :                 return -EINVAL;
    5899             :         }
    5900             : 
    5901             : 
    5902             :         /* Currently we only support failover to the same NQN. */
    5903           9 :         if (strncmp(trid->subnqn, nvme_ctrlr->active_path_id->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
    5904           0 :                 SPDK_WARNLOG("Failover from subnqn: %s to a different subnqn: %s is not supported currently\n",
    5905             :                              nvme_ctrlr->active_path_id->trid.subnqn, trid->subnqn);
    5906           0 :                 return -EINVAL;
    5907             :         }
    5908             : 
    5909             :         /* Skip all the other checks if we've already registered this path. */
    5910          21 :         TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) {
    5911          12 :                 if (!spdk_nvme_transport_id_compare(&tmp_trid->trid, trid)) {
    5912           0 :                         SPDK_WARNLOG("This path (traddr: %s subnqn: %s) is already registered\n", trid->traddr,
    5913             :                                      trid->subnqn);
    5914           0 :                         return -EALREADY;
    5915             :                 }
    5916             :         }
    5917             : 
    5918           9 :         return 0;
    5919             : }
    5920             : 
    5921             : static int
    5922           9 : bdev_nvme_check_secondary_namespace(struct nvme_ctrlr *nvme_ctrlr,
    5923             :                                     struct spdk_nvme_ctrlr *new_ctrlr)
    5924             : {
    5925             :         struct nvme_ns *nvme_ns;
    5926             :         struct spdk_nvme_ns *new_ns;
    5927             : 
    5928           9 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    5929           9 :         while (nvme_ns != NULL) {
    5930           0 :                 new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id);
    5931           0 :                 assert(new_ns != NULL);
    5932             : 
    5933           0 :                 if (!bdev_nvme_compare_ns(nvme_ns->ns, new_ns)) {
    5934           0 :                         return -EINVAL;
    5935             :                 }
    5936             : 
    5937           0 :                 nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    5938             :         }
    5939             : 
    5940           9 :         return 0;
    5941             : }
    5942             : 
    5943             : static int
    5944           9 : _bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    5945             :                               struct spdk_nvme_transport_id *trid)
    5946             : {
    5947             :         struct nvme_path_id *active_id, *new_trid, *tmp_trid;
    5948             : 
    5949           9 :         new_trid = calloc(1, sizeof(*new_trid));
    5950           9 :         if (new_trid == NULL) {
    5951           0 :                 return -ENOMEM;
    5952             :         }
    5953           9 :         new_trid->trid = *trid;
    5954             : 
    5955           9 :         active_id = nvme_ctrlr->active_path_id;
    5956           9 :         assert(active_id != NULL);
    5957           9 :         assert(active_id == TAILQ_FIRST(&nvme_ctrlr->trids));
    5958             : 
    5959             :         /* Skip the active trid not to replace it until it is failed. */
    5960           9 :         tmp_trid = TAILQ_NEXT(active_id, link);
    5961           9 :         if (tmp_trid == NULL) {
    5962           6 :                 goto add_tail;
    5963             :         }
    5964             : 
    5965             :         /* It means the trid is faled if its last failed time is non-zero.
    5966             :          * Insert the new alternate trid before any failed trid.
    5967             :          */
    5968           5 :         TAILQ_FOREACH_FROM(tmp_trid, &nvme_ctrlr->trids, link) {
    5969           3 :                 if (tmp_trid->last_failed_tsc != 0) {
    5970           1 :                         TAILQ_INSERT_BEFORE(tmp_trid, new_trid, link);
    5971           1 :                         return 0;
    5972             :                 }
    5973             :         }
    5974             : 
    5975           2 : add_tail:
    5976           8 :         TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, new_trid, link);
    5977           8 :         return 0;
    5978             : }
    5979             : 
    5980             : /* This is the case that a secondary path is added to an existing
    5981             :  * nvme_ctrlr for failover. After checking if it can access the same
    5982             :  * namespaces as the primary path, it is disconnected until failover occurs.
    5983             :  */
    5984             : static int
    5985           9 : bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    5986             :                              struct spdk_nvme_ctrlr *new_ctrlr,
    5987             :                              struct spdk_nvme_transport_id *trid)
    5988             : {
    5989             :         int rc;
    5990             : 
    5991           9 :         assert(nvme_ctrlr != NULL);
    5992             : 
    5993           9 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5994             : 
    5995           9 :         rc = bdev_nvme_check_secondary_trid(nvme_ctrlr, new_ctrlr, trid);
    5996           9 :         if (rc != 0) {
    5997           0 :                 goto exit;
    5998             :         }
    5999             : 
    6000           9 :         rc = bdev_nvme_check_secondary_namespace(nvme_ctrlr, new_ctrlr);
    6001           9 :         if (rc != 0) {
    6002           0 :                 goto exit;
    6003             :         }
    6004             : 
    6005           9 :         rc = _bdev_nvme_add_secondary_trid(nvme_ctrlr, trid);
    6006             : 
    6007           9 : exit:
    6008           9 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6009             : 
    6010           9 :         spdk_nvme_detach(new_ctrlr);
    6011             : 
    6012           9 :         return rc;
    6013             : }
    6014             : 
    6015             : static void
    6016          46 : connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    6017             :                   struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
    6018             : {
    6019          46 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    6020             :         struct nvme_async_probe_ctx *ctx;
    6021             :         int rc;
    6022             : 
    6023          46 :         ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
    6024          46 :         ctx->ctrlr_attached = true;
    6025             : 
    6026          46 :         rc = nvme_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx);
    6027          46 :         if (rc != 0) {
    6028           1 :                 ctx->reported_bdevs = 0;
    6029           1 :                 populate_namespaces_cb(ctx, rc);
    6030             :         }
    6031          46 : }
    6032             : 
    6033             : static void
    6034           4 : connect_set_failover_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    6035             :                         struct spdk_nvme_ctrlr *ctrlr,
    6036             :                         const struct spdk_nvme_ctrlr_opts *opts)
    6037             : {
    6038           4 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    6039             :         struct nvme_ctrlr *nvme_ctrlr;
    6040             :         struct nvme_async_probe_ctx *ctx;
    6041             :         int rc;
    6042             : 
    6043           4 :         ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
    6044           4 :         ctx->ctrlr_attached = true;
    6045             : 
    6046           4 :         nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
    6047           4 :         if (nvme_ctrlr) {
    6048           4 :                 rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, ctrlr, &ctx->trid);
    6049             :         } else {
    6050           0 :                 rc = -ENODEV;
    6051             :         }
    6052             : 
    6053           4 :         ctx->reported_bdevs = 0;
    6054           4 :         populate_namespaces_cb(ctx, rc);
    6055           4 : }
    6056             : 
    6057             : static int
    6058          51 : bdev_nvme_async_poll(void *arg)
    6059             : {
    6060          51 :         struct nvme_async_probe_ctx     *ctx = arg;
    6061             :         int                             rc;
    6062             : 
    6063          51 :         rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
    6064          51 :         if (spdk_unlikely(rc != -EAGAIN)) {
    6065          51 :                 ctx->probe_done = true;
    6066          51 :                 spdk_poller_unregister(&ctx->poller);
    6067          51 :                 if (!ctx->ctrlr_attached) {
    6068             :                         /* The probe is done, but no controller was attached.
    6069             :                          * That means we had a failure, so report -EIO back to
    6070             :                          * the caller (usually the RPC). populate_namespaces_cb()
    6071             :                          * will take care of freeing the nvme_async_probe_ctx.
    6072             :                          */
    6073           1 :                         ctx->reported_bdevs = 0;
    6074           1 :                         populate_namespaces_cb(ctx, -EIO);
    6075          50 :                 } else if (ctx->namespaces_populated) {
    6076             :                         /* The namespaces for the attached controller were all
    6077             :                          * populated and the response was already sent to the
    6078             :                          * caller (usually the RPC).  So free the context here.
    6079             :                          */
    6080          20 :                         free_nvme_async_probe_ctx(ctx);
    6081             :                 }
    6082             :         }
    6083             : 
    6084          51 :         return SPDK_POLLER_BUSY;
    6085             : }
    6086             : 
    6087             : static bool
    6088          28 : bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
    6089             :                 uint32_t reconnect_delay_sec,
    6090             :                 uint32_t fast_io_fail_timeout_sec)
    6091             : {
    6092          28 :         if (ctrlr_loss_timeout_sec < -1) {
    6093           1 :                 SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n");
    6094           1 :                 return false;
    6095          27 :         } else if (ctrlr_loss_timeout_sec == -1) {
    6096          13 :                 if (reconnect_delay_sec == 0) {
    6097           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
    6098           1 :                         return false;
    6099          12 :                 } else if (fast_io_fail_timeout_sec != 0 &&
    6100             :                            fast_io_fail_timeout_sec < reconnect_delay_sec) {
    6101           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io-fail_timeout_sec.\n");
    6102           1 :                         return false;
    6103             :                 }
    6104          14 :         } else if (ctrlr_loss_timeout_sec != 0) {
    6105          11 :                 if (reconnect_delay_sec == 0) {
    6106           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
    6107           1 :                         return false;
    6108          10 :                 } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) {
    6109           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n");
    6110           1 :                         return false;
    6111           9 :                 } else if (fast_io_fail_timeout_sec != 0) {
    6112           6 :                         if (fast_io_fail_timeout_sec < reconnect_delay_sec) {
    6113           1 :                                 SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io_fail_timeout_sec.\n");
    6114           1 :                                 return false;
    6115           5 :                         } else if (fast_io_fail_timeout_sec > (uint32_t)ctrlr_loss_timeout_sec) {
    6116           1 :                                 SPDK_ERRLOG("fast_io_fail_timeout_sec can't be more than ctrlr_loss_timeout_sec.\n");
    6117           1 :                                 return false;
    6118             :                         }
    6119             :                 }
    6120           3 :         } else if (reconnect_delay_sec != 0 || fast_io_fail_timeout_sec != 0) {
    6121           2 :                 SPDK_ERRLOG("Both reconnect_delay_sec and fast_io_fail_timeout_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n");
    6122           2 :                 return false;
    6123             :         }
    6124             : 
    6125          19 :         return true;
    6126             : }
    6127             : 
    6128             : static int
    6129           0 : bdev_nvme_load_psk(const char *fname, char *buf, size_t bufsz)
    6130             : {
    6131             :         FILE *psk_file;
    6132           0 :         struct stat statbuf;
    6133             :         int rc;
    6134             : #define TCP_PSK_INVALID_PERMISSIONS 0177
    6135             : 
    6136           0 :         if (stat(fname, &statbuf) != 0) {
    6137           0 :                 SPDK_ERRLOG("Could not read permissions for PSK file\n");
    6138           0 :                 return -EACCES;
    6139             :         }
    6140             : 
    6141           0 :         if ((statbuf.st_mode & TCP_PSK_INVALID_PERMISSIONS) != 0) {
    6142           0 :                 SPDK_ERRLOG("Incorrect permissions for PSK file\n");
    6143           0 :                 return -EPERM;
    6144             :         }
    6145           0 :         if ((size_t)statbuf.st_size >= bufsz) {
    6146           0 :                 SPDK_ERRLOG("Invalid PSK: too long\n");
    6147           0 :                 return -EINVAL;
    6148             :         }
    6149           0 :         psk_file = fopen(fname, "r");
    6150           0 :         if (psk_file == NULL) {
    6151           0 :                 SPDK_ERRLOG("Could not open PSK file\n");
    6152           0 :                 return -EINVAL;
    6153             :         }
    6154             : 
    6155           0 :         memset(buf, 0, bufsz);
    6156           0 :         rc = fread(buf, 1, statbuf.st_size, psk_file);
    6157           0 :         if (rc != statbuf.st_size) {
    6158           0 :                 SPDK_ERRLOG("Failed to read PSK\n");
    6159           0 :                 fclose(psk_file);
    6160           0 :                 return -EINVAL;
    6161             :         }
    6162             : 
    6163           0 :         fclose(psk_file);
    6164           0 :         return 0;
    6165             : }
    6166             : 
    6167             : int
    6168          51 : bdev_nvme_create(struct spdk_nvme_transport_id *trid,
    6169             :                  const char *base_name,
    6170             :                  const char **names,
    6171             :                  uint32_t count,
    6172             :                  spdk_bdev_create_nvme_fn cb_fn,
    6173             :                  void *cb_ctx,
    6174             :                  struct spdk_nvme_ctrlr_opts *drv_opts,
    6175             :                  struct nvme_ctrlr_opts *bdev_opts,
    6176             :                  bool multipath)
    6177             : {
    6178             :         struct nvme_probe_skip_entry *entry, *tmp;
    6179             :         struct nvme_async_probe_ctx *ctx;
    6180             :         spdk_nvme_attach_cb attach_cb;
    6181             :         int rc, len;
    6182             : 
    6183             :         /* TODO expand this check to include both the host and target TRIDs.
    6184             :          * Only if both are the same should we fail.
    6185             :          */
    6186          51 :         if (nvme_ctrlr_get(trid, drv_opts->hostnqn) != NULL) {
    6187           0 :                 SPDK_ERRLOG("A controller with the provided trid (traddr: %s, hostnqn: %s) "
    6188             :                             "already exists.\n", trid->traddr, drv_opts->hostnqn);
    6189           0 :                 return -EEXIST;
    6190             :         }
    6191             : 
    6192          51 :         len = strnlen(base_name, SPDK_CONTROLLER_NAME_MAX);
    6193             : 
    6194          51 :         if (len == 0 || len == SPDK_CONTROLLER_NAME_MAX) {
    6195           0 :                 SPDK_ERRLOG("controller name must be between 1 and %d characters\n", SPDK_CONTROLLER_NAME_MAX - 1);
    6196           0 :                 return -EINVAL;
    6197             :         }
    6198             : 
    6199          51 :         if (bdev_opts != NULL &&
    6200           9 :             !bdev_nvme_check_io_error_resiliency_params(bdev_opts->ctrlr_loss_timeout_sec,
    6201             :                             bdev_opts->reconnect_delay_sec,
    6202             :                             bdev_opts->fast_io_fail_timeout_sec)) {
    6203           0 :                 return -EINVAL;
    6204             :         }
    6205             : 
    6206          51 :         ctx = calloc(1, sizeof(*ctx));
    6207          51 :         if (!ctx) {
    6208           0 :                 return -ENOMEM;
    6209             :         }
    6210          51 :         ctx->base_name = base_name;
    6211          51 :         ctx->names = names;
    6212          51 :         ctx->max_bdevs = count;
    6213          51 :         ctx->cb_fn = cb_fn;
    6214          51 :         ctx->cb_ctx = cb_ctx;
    6215          51 :         ctx->trid = *trid;
    6216             : 
    6217          51 :         if (bdev_opts) {
    6218           9 :                 memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
    6219             :         } else {
    6220          42 :                 bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
    6221             :         }
    6222             : 
    6223          51 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    6224           0 :                 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
    6225           0 :                         if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
    6226           0 :                                 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
    6227           0 :                                 free(entry);
    6228           0 :                                 break;
    6229             :                         }
    6230             :                 }
    6231             :         }
    6232             : 
    6233          51 :         memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
    6234          51 :         ctx->drv_opts.transport_retry_count = g_opts.transport_retry_count;
    6235          51 :         ctx->drv_opts.transport_ack_timeout = g_opts.transport_ack_timeout;
    6236          51 :         ctx->drv_opts.keep_alive_timeout_ms = g_opts.keep_alive_timeout_ms;
    6237          51 :         ctx->drv_opts.disable_read_ana_log_page = true;
    6238          51 :         ctx->drv_opts.transport_tos = g_opts.transport_tos;
    6239             : 
    6240          51 :         if (ctx->bdev_opts.psk[0] != '\0') {
    6241             :                 /* Try to use the keyring first */
    6242           0 :                 ctx->drv_opts.tls_psk = spdk_keyring_get_key(ctx->bdev_opts.psk);
    6243           0 :                 if (ctx->drv_opts.tls_psk == NULL) {
    6244           0 :                         rc = bdev_nvme_load_psk(ctx->bdev_opts.psk,
    6245           0 :                                                 ctx->drv_opts.psk, sizeof(ctx->drv_opts.psk));
    6246           0 :                         if (rc != 0) {
    6247           0 :                                 SPDK_ERRLOG("Could not load PSK from %s\n", ctx->bdev_opts.psk);
    6248           0 :                                 free_nvme_async_probe_ctx(ctx);
    6249           0 :                                 return rc;
    6250             :                         }
    6251             :                 }
    6252             :         }
    6253             : 
    6254          51 :         if (ctx->bdev_opts.dhchap_key != NULL) {
    6255           0 :                 ctx->drv_opts.dhchap_key = spdk_keyring_get_key(ctx->bdev_opts.dhchap_key);
    6256           0 :                 if (ctx->drv_opts.dhchap_key == NULL) {
    6257           0 :                         SPDK_ERRLOG("Could not load DH-HMAC-CHAP key: %s\n",
    6258             :                                     ctx->bdev_opts.dhchap_key);
    6259           0 :                         free_nvme_async_probe_ctx(ctx);
    6260           0 :                         return -ENOKEY;
    6261             :                 }
    6262             : 
    6263           0 :                 ctx->drv_opts.dhchap_digests = g_opts.dhchap_digests;
    6264           0 :                 ctx->drv_opts.dhchap_dhgroups = g_opts.dhchap_dhgroups;
    6265             :         }
    6266          51 :         if (ctx->bdev_opts.dhchap_ctrlr_key != NULL) {
    6267           0 :                 ctx->drv_opts.dhchap_ctrlr_key =
    6268           0 :                         spdk_keyring_get_key(ctx->bdev_opts.dhchap_ctrlr_key);
    6269           0 :                 if (ctx->drv_opts.dhchap_ctrlr_key == NULL) {
    6270           0 :                         SPDK_ERRLOG("Could not load DH-HMAC-CHAP controller key: %s\n",
    6271             :                                     ctx->bdev_opts.dhchap_ctrlr_key);
    6272           0 :                         free_nvme_async_probe_ctx(ctx);
    6273           0 :                         return -ENOKEY;
    6274             :                 }
    6275             :         }
    6276             : 
    6277          51 :         if (nvme_bdev_ctrlr_get_by_name(base_name) == NULL || multipath) {
    6278          47 :                 attach_cb = connect_attach_cb;
    6279             :         } else {
    6280           4 :                 attach_cb = connect_set_failover_cb;
    6281             :         }
    6282             : 
    6283          51 :         ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, attach_cb);
    6284          51 :         if (ctx->probe_ctx == NULL) {
    6285           0 :                 SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
    6286           0 :                 free_nvme_async_probe_ctx(ctx);
    6287           0 :                 return -ENODEV;
    6288             :         }
    6289          51 :         ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000);
    6290             : 
    6291          51 :         return 0;
    6292             : }
    6293             : 
    6294             : struct bdev_nvme_delete_ctx {
    6295             :         char                        *name;
    6296             :         struct nvme_path_id         path_id;
    6297             :         bdev_nvme_delete_done_fn    delete_done;
    6298             :         void                        *delete_done_ctx;
    6299             :         uint64_t                    timeout_ticks;
    6300             :         struct spdk_poller          *poller;
    6301             : };
    6302             : 
    6303             : static void
    6304           2 : free_bdev_nvme_delete_ctx(struct bdev_nvme_delete_ctx *ctx)
    6305             : {
    6306           2 :         if (ctx != NULL) {
    6307           1 :                 free(ctx->name);
    6308           1 :                 free(ctx);
    6309             :         }
    6310           2 : }
    6311             : 
    6312             : static bool
    6313          74 : nvme_path_id_compare(struct nvme_path_id *p, const struct nvme_path_id *path_id)
    6314             : {
    6315          74 :         if (path_id->trid.trtype != 0) {
    6316          21 :                 if (path_id->trid.trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
    6317           0 :                         if (strcasecmp(path_id->trid.trstring, p->trid.trstring) != 0) {
    6318           0 :                                 return false;
    6319             :                         }
    6320             :                 } else {
    6321          21 :                         if (path_id->trid.trtype != p->trid.trtype) {
    6322           0 :                                 return false;
    6323             :                         }
    6324             :                 }
    6325             :         }
    6326             : 
    6327          74 :         if (!spdk_mem_all_zero(path_id->trid.traddr, sizeof(path_id->trid.traddr))) {
    6328          21 :                 if (strcasecmp(path_id->trid.traddr, p->trid.traddr) != 0) {
    6329          11 :                         return false;
    6330             :                 }
    6331             :         }
    6332             : 
    6333          63 :         if (path_id->trid.adrfam != 0) {
    6334           0 :                 if (path_id->trid.adrfam != p->trid.adrfam) {
    6335           0 :                         return false;
    6336             :                 }
    6337             :         }
    6338             : 
    6339          63 :         if (!spdk_mem_all_zero(path_id->trid.trsvcid, sizeof(path_id->trid.trsvcid))) {
    6340          10 :                 if (strcasecmp(path_id->trid.trsvcid, p->trid.trsvcid) != 0) {
    6341           0 :                         return false;
    6342             :                 }
    6343             :         }
    6344             : 
    6345          63 :         if (!spdk_mem_all_zero(path_id->trid.subnqn, sizeof(path_id->trid.subnqn))) {
    6346          10 :                 if (strcmp(path_id->trid.subnqn, p->trid.subnqn) != 0) {
    6347           0 :                         return false;
    6348             :                 }
    6349             :         }
    6350             : 
    6351          63 :         if (!spdk_mem_all_zero(path_id->hostid.hostaddr, sizeof(path_id->hostid.hostaddr))) {
    6352           0 :                 if (strcmp(path_id->hostid.hostaddr, p->hostid.hostaddr) != 0) {
    6353           0 :                         return false;
    6354             :                 }
    6355             :         }
    6356             : 
    6357          63 :         if (!spdk_mem_all_zero(path_id->hostid.hostsvcid, sizeof(path_id->hostid.hostsvcid))) {
    6358           0 :                 if (strcmp(path_id->hostid.hostsvcid, p->hostid.hostsvcid) != 0) {
    6359           0 :                         return false;
    6360             :                 }
    6361             :         }
    6362             : 
    6363          63 :         return true;
    6364             : }
    6365             : 
    6366             : static bool
    6367           2 : nvme_path_id_exists(const char *name, const struct nvme_path_id *path_id)
    6368             : {
    6369             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
    6370             :         struct nvme_ctrlr       *ctrlr;
    6371             :         struct nvme_path_id     *p;
    6372             : 
    6373           2 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    6374           2 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    6375           2 :         if (!nbdev_ctrlr) {
    6376           1 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6377           1 :                 return false;
    6378             :         }
    6379             : 
    6380           1 :         TAILQ_FOREACH(ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    6381           1 :                 pthread_mutex_lock(&ctrlr->mutex);
    6382           1 :                 TAILQ_FOREACH(p, &ctrlr->trids, link) {
    6383           1 :                         if (nvme_path_id_compare(p, path_id)) {
    6384           1 :                                 pthread_mutex_unlock(&ctrlr->mutex);
    6385           1 :                                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6386           1 :                                 return true;
    6387             :                         }
    6388             :                 }
    6389           0 :                 pthread_mutex_unlock(&ctrlr->mutex);
    6390             :         }
    6391           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6392             : 
    6393           0 :         return false;
    6394             : }
    6395             : 
    6396             : static int
    6397           2 : bdev_nvme_delete_complete_poll(void *arg)
    6398             : {
    6399           2 :         struct bdev_nvme_delete_ctx     *ctx = arg;
    6400           2 :         int                             rc = 0;
    6401             : 
    6402           2 :         if (nvme_path_id_exists(ctx->name, &ctx->path_id)) {
    6403           1 :                 if (ctx->timeout_ticks > spdk_get_ticks()) {
    6404           1 :                         return SPDK_POLLER_BUSY;
    6405             :                 }
    6406             : 
    6407           0 :                 SPDK_ERRLOG("NVMe path '%s' still exists after delete\n", ctx->name);
    6408           0 :                 rc = -ETIMEDOUT;
    6409             :         }
    6410             : 
    6411           1 :         spdk_poller_unregister(&ctx->poller);
    6412             : 
    6413           1 :         ctx->delete_done(ctx->delete_done_ctx, rc);
    6414           1 :         free_bdev_nvme_delete_ctx(ctx);
    6415             : 
    6416           1 :         return SPDK_POLLER_BUSY;
    6417             : }
    6418             : 
    6419             : static int
    6420          63 : _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, const struct nvme_path_id *path_id)
    6421             : {
    6422             :         struct nvme_path_id     *p, *t;
    6423             :         spdk_msg_fn             msg_fn;
    6424          63 :         int                     rc = -ENXIO;
    6425             : 
    6426          63 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    6427             : 
    6428          73 :         TAILQ_FOREACH_REVERSE_SAFE(p, &nvme_ctrlr->trids, nvme_paths, link, t) {
    6429          73 :                 if (p == TAILQ_FIRST(&nvme_ctrlr->trids)) {
    6430          63 :                         break;
    6431             :                 }
    6432             : 
    6433          10 :                 if (!nvme_path_id_compare(p, path_id)) {
    6434           3 :                         continue;
    6435             :                 }
    6436             : 
    6437             :                 /* We are not using the specified path. */
    6438           7 :                 TAILQ_REMOVE(&nvme_ctrlr->trids, p, link);
    6439           7 :                 free(p);
    6440           7 :                 rc = 0;
    6441             :         }
    6442             : 
    6443          63 :         if (p == NULL || !nvme_path_id_compare(p, path_id)) {
    6444           8 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6445           8 :                 return rc;
    6446             :         }
    6447             : 
    6448             :         /* If we made it here, then this path is a match! Now we need to remove it. */
    6449             : 
    6450             :         /* This is the active path in use right now. The active path is always the first in the list. */
    6451          55 :         assert(p == nvme_ctrlr->active_path_id);
    6452             : 
    6453          55 :         if (!TAILQ_NEXT(p, link)) {
    6454             :                 /* The current path is the only path. */
    6455          54 :                 msg_fn = _nvme_ctrlr_destruct;
    6456          54 :                 rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, false);
    6457             :         } else {
    6458             :                 /* There is an alternative path. */
    6459           1 :                 msg_fn = _bdev_nvme_reset_ctrlr;
    6460           1 :                 rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, true);
    6461             :         }
    6462             : 
    6463          55 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6464             : 
    6465          55 :         if (rc == 0) {
    6466          55 :                 spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    6467           0 :         } else if (rc == -EALREADY) {
    6468           0 :                 rc = 0;
    6469             :         }
    6470             : 
    6471          55 :         return rc;
    6472             : }
    6473             : 
    6474             : int
    6475          48 : bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id,
    6476             :                  bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx)
    6477             : {
    6478             :         struct nvme_bdev_ctrlr          *nbdev_ctrlr;
    6479             :         struct nvme_ctrlr               *nvme_ctrlr, *tmp_nvme_ctrlr;
    6480          48 :         struct bdev_nvme_delete_ctx     *ctx = NULL;
    6481          48 :         int                             rc = -ENXIO, _rc;
    6482             : 
    6483          48 :         if (name == NULL || path_id == NULL) {
    6484           0 :                 rc = -EINVAL;
    6485           0 :                 goto exit;
    6486             :         }
    6487             : 
    6488          48 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    6489             : 
    6490          48 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    6491          48 :         if (nbdev_ctrlr == NULL) {
    6492           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6493             : 
    6494           0 :                 SPDK_ERRLOG("Failed to find NVMe bdev controller\n");
    6495           0 :                 rc = -ENODEV;
    6496           0 :                 goto exit;
    6497             :         }
    6498             : 
    6499         111 :         TAILQ_FOREACH_SAFE(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq, tmp_nvme_ctrlr) {
    6500          63 :                 _rc = _bdev_nvme_delete(nvme_ctrlr, path_id);
    6501          63 :                 if (_rc < 0 && _rc != -ENXIO) {
    6502           0 :                         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6503           0 :                         rc = _rc;
    6504           0 :                         goto exit;
    6505          63 :                 } else if (_rc == 0) {
    6506             :                         /* We traverse all remaining nvme_ctrlrs even if one nvme_ctrlr
    6507             :                          * was deleted successfully. To remember the successful deletion,
    6508             :                          * overwrite rc only if _rc is zero.
    6509             :                          */
    6510          57 :                         rc = 0;
    6511             :                 }
    6512             :         }
    6513             : 
    6514          48 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6515             : 
    6516          48 :         if (rc != 0 || delete_done == NULL) {
    6517          47 :                 goto exit;
    6518             :         }
    6519             : 
    6520           1 :         ctx = calloc(1, sizeof(*ctx));
    6521           1 :         if (ctx == NULL) {
    6522           0 :                 SPDK_ERRLOG("Failed to allocate context for bdev_nvme_delete\n");
    6523           0 :                 rc = -ENOMEM;
    6524           0 :                 goto exit;
    6525             :         }
    6526             : 
    6527           1 :         ctx->name = strdup(name);
    6528           1 :         if (ctx->name == NULL) {
    6529           0 :                 SPDK_ERRLOG("Failed to copy controller name for deletion\n");
    6530           0 :                 rc = -ENOMEM;
    6531           0 :                 goto exit;
    6532             :         }
    6533             : 
    6534           1 :         ctx->delete_done = delete_done;
    6535           1 :         ctx->delete_done_ctx = delete_done_ctx;
    6536           1 :         ctx->path_id = *path_id;
    6537           1 :         ctx->timeout_ticks = spdk_get_ticks() + 10 * spdk_get_ticks_hz();
    6538           1 :         ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_delete_complete_poll, ctx, 1000);
    6539           1 :         if (ctx->poller == NULL) {
    6540           0 :                 SPDK_ERRLOG("Failed to register bdev_nvme_delete poller\n");
    6541           0 :                 rc = -ENOMEM;
    6542           0 :                 goto exit;
    6543             :         }
    6544             : 
    6545           1 : exit:
    6546          48 :         if (rc != 0) {
    6547           1 :                 free_bdev_nvme_delete_ctx(ctx);
    6548             :         }
    6549             : 
    6550          48 :         return rc;
    6551             : }
    6552             : 
    6553             : #define DISCOVERY_INFOLOG(ctx, format, ...) \
    6554             :         SPDK_INFOLOG(bdev_nvme, "Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
    6555             : 
    6556             : #define DISCOVERY_ERRLOG(ctx, format, ...) \
    6557             :         SPDK_ERRLOG("Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
    6558             : 
    6559             : struct discovery_entry_ctx {
    6560             :         char                                            name[128];
    6561             :         struct spdk_nvme_transport_id                   trid;
    6562             :         struct spdk_nvme_ctrlr_opts                     drv_opts;
    6563             :         struct spdk_nvmf_discovery_log_page_entry       entry;
    6564             :         TAILQ_ENTRY(discovery_entry_ctx)                tailq;
    6565             :         struct discovery_ctx                            *ctx;
    6566             : };
    6567             : 
    6568             : struct discovery_ctx {
    6569             :         char                                    *name;
    6570             :         spdk_bdev_nvme_start_discovery_fn       start_cb_fn;
    6571             :         spdk_bdev_nvme_stop_discovery_fn        stop_cb_fn;
    6572             :         void                                    *cb_ctx;
    6573             :         struct spdk_nvme_probe_ctx              *probe_ctx;
    6574             :         struct spdk_nvme_detach_ctx             *detach_ctx;
    6575             :         struct spdk_nvme_ctrlr                  *ctrlr;
    6576             :         struct spdk_nvme_transport_id           trid;
    6577             :         struct discovery_entry_ctx              *entry_ctx_in_use;
    6578             :         struct spdk_poller                      *poller;
    6579             :         struct spdk_nvme_ctrlr_opts             drv_opts;
    6580             :         struct nvme_ctrlr_opts                  bdev_opts;
    6581             :         struct spdk_nvmf_discovery_log_page     *log_page;
    6582             :         TAILQ_ENTRY(discovery_ctx)              tailq;
    6583             :         TAILQ_HEAD(, discovery_entry_ctx)       nvm_entry_ctxs;
    6584             :         TAILQ_HEAD(, discovery_entry_ctx)       discovery_entry_ctxs;
    6585             :         int                                     rc;
    6586             :         bool                                    wait_for_attach;
    6587             :         uint64_t                                timeout_ticks;
    6588             :         /* Denotes that the discovery service is being started. We're waiting
    6589             :          * for the initial connection to the discovery controller to be
    6590             :          * established and attach discovered NVM ctrlrs.
    6591             :          */
    6592             :         bool                                    initializing;
    6593             :         /* Denotes if a discovery is currently in progress for this context.
    6594             :          * That includes connecting to newly discovered subsystems.  Used to
    6595             :          * ensure we do not start a new discovery until an existing one is
    6596             :          * complete.
    6597             :          */
    6598             :         bool                                    in_progress;
    6599             : 
    6600             :         /* Denotes if another discovery is needed after the one in progress
    6601             :          * completes.  Set when we receive an AER completion while a discovery
    6602             :          * is already in progress.
    6603             :          */
    6604             :         bool                                    pending;
    6605             : 
    6606             :         /* Signal to the discovery context poller that it should stop the
    6607             :          * discovery service, including detaching from the current discovery
    6608             :          * controller.
    6609             :          */
    6610             :         bool                                    stop;
    6611             : 
    6612             :         struct spdk_thread                      *calling_thread;
    6613             :         uint32_t                                index;
    6614             :         uint32_t                                attach_in_progress;
    6615             :         char                                    *hostnqn;
    6616             : 
    6617             :         /* Denotes if the discovery service was started by the mdns discovery.
    6618             :          */
    6619             :         bool                                    from_mdns_discovery_service;
    6620             : };
    6621             : 
    6622             : TAILQ_HEAD(discovery_ctxs, discovery_ctx);
    6623             : static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs);
    6624             : 
    6625             : static void get_discovery_log_page(struct discovery_ctx *ctx);
    6626             : 
    6627             : static void
    6628           0 : free_discovery_ctx(struct discovery_ctx *ctx)
    6629             : {
    6630           0 :         free(ctx->log_page);
    6631           0 :         free(ctx->hostnqn);
    6632           0 :         free(ctx->name);
    6633           0 :         free(ctx);
    6634           0 : }
    6635             : 
    6636             : static void
    6637           0 : discovery_complete(struct discovery_ctx *ctx)
    6638             : {
    6639           0 :         ctx->initializing = false;
    6640           0 :         ctx->in_progress = false;
    6641           0 :         if (ctx->pending) {
    6642           0 :                 ctx->pending = false;
    6643           0 :                 get_discovery_log_page(ctx);
    6644             :         }
    6645           0 : }
    6646             : 
    6647             : static void
    6648           0 : build_trid_from_log_page_entry(struct spdk_nvme_transport_id *trid,
    6649             :                                struct spdk_nvmf_discovery_log_page_entry *entry)
    6650             : {
    6651             :         char *space;
    6652             : 
    6653           0 :         trid->trtype = entry->trtype;
    6654           0 :         trid->adrfam = entry->adrfam;
    6655           0 :         memcpy(trid->traddr, entry->traddr, sizeof(entry->traddr));
    6656           0 :         memcpy(trid->trsvcid, entry->trsvcid, sizeof(entry->trsvcid));
    6657             :         /* Because the source buffer (entry->subnqn) is longer than trid->subnqn, and
    6658             :          * before call to this function trid->subnqn is zeroed out, we need
    6659             :          * to copy sizeof(trid->subnqn) minus one byte to make sure the last character
    6660             :          * remains 0. Then we can shorten the string (replace ' ' with 0) if required
    6661             :          */
    6662           0 :         memcpy(trid->subnqn, entry->subnqn, sizeof(trid->subnqn) - 1);
    6663             : 
    6664             :         /* We want the traddr, trsvcid and subnqn fields to be NULL-terminated.
    6665             :          * But the log page entries typically pad them with spaces, not zeroes.
    6666             :          * So add a NULL terminator to each of these fields at the appropriate
    6667             :          * location.
    6668             :          */
    6669           0 :         space = strchr(trid->traddr, ' ');
    6670           0 :         if (space) {
    6671           0 :                 *space = 0;
    6672             :         }
    6673           0 :         space = strchr(trid->trsvcid, ' ');
    6674           0 :         if (space) {
    6675           0 :                 *space = 0;
    6676             :         }
    6677           0 :         space = strchr(trid->subnqn, ' ');
    6678           0 :         if (space) {
    6679           0 :                 *space = 0;
    6680             :         }
    6681           0 : }
    6682             : 
    6683             : static void
    6684           0 : _stop_discovery(void *_ctx)
    6685             : {
    6686           0 :         struct discovery_ctx *ctx = _ctx;
    6687             : 
    6688           0 :         if (ctx->attach_in_progress > 0) {
    6689           0 :                 spdk_thread_send_msg(spdk_get_thread(), _stop_discovery, ctx);
    6690           0 :                 return;
    6691             :         }
    6692             : 
    6693           0 :         ctx->stop = true;
    6694             : 
    6695           0 :         while (!TAILQ_EMPTY(&ctx->nvm_entry_ctxs)) {
    6696             :                 struct discovery_entry_ctx *entry_ctx;
    6697           0 :                 struct nvme_path_id path = {};
    6698             : 
    6699           0 :                 entry_ctx = TAILQ_FIRST(&ctx->nvm_entry_ctxs);
    6700           0 :                 path.trid = entry_ctx->trid;
    6701           0 :                 bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
    6702           0 :                 TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
    6703           0 :                 free(entry_ctx);
    6704             :         }
    6705             : 
    6706           0 :         while (!TAILQ_EMPTY(&ctx->discovery_entry_ctxs)) {
    6707             :                 struct discovery_entry_ctx *entry_ctx;
    6708             : 
    6709           0 :                 entry_ctx = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
    6710           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
    6711           0 :                 free(entry_ctx);
    6712             :         }
    6713             : 
    6714           0 :         free(ctx->entry_ctx_in_use);
    6715           0 :         ctx->entry_ctx_in_use = NULL;
    6716             : }
    6717             : 
    6718             : static void
    6719           0 : stop_discovery(struct discovery_ctx *ctx, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
    6720             : {
    6721           0 :         ctx->stop_cb_fn = cb_fn;
    6722           0 :         ctx->cb_ctx = cb_ctx;
    6723             : 
    6724           0 :         if (ctx->attach_in_progress > 0) {
    6725           0 :                 DISCOVERY_INFOLOG(ctx, "stopping discovery with attach_in_progress: %"PRIu32"\n",
    6726             :                                   ctx->attach_in_progress);
    6727             :         }
    6728             : 
    6729           0 :         _stop_discovery(ctx);
    6730           0 : }
    6731             : 
    6732             : static void
    6733           2 : remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr)
    6734             : {
    6735             :         struct discovery_ctx *d_ctx;
    6736             :         struct nvme_path_id *path_id;
    6737           2 :         struct spdk_nvme_transport_id trid = {};
    6738             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    6739             : 
    6740           2 :         path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
    6741             : 
    6742           2 :         TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
    6743           0 :                 TAILQ_FOREACH_SAFE(entry_ctx, &d_ctx->nvm_entry_ctxs, tailq, tmp) {
    6744           0 :                         build_trid_from_log_page_entry(&trid, &entry_ctx->entry);
    6745           0 :                         if (spdk_nvme_transport_id_compare(&trid, &path_id->trid) != 0) {
    6746           0 :                                 continue;
    6747             :                         }
    6748             : 
    6749           0 :                         TAILQ_REMOVE(&d_ctx->nvm_entry_ctxs, entry_ctx, tailq);
    6750           0 :                         free(entry_ctx);
    6751           0 :                         DISCOVERY_INFOLOG(d_ctx, "Remove discovery entry: %s:%s:%s\n",
    6752             :                                           trid.subnqn, trid.traddr, trid.trsvcid);
    6753             : 
    6754             :                         /* Fail discovery ctrlr to force reattach attempt */
    6755           0 :                         spdk_nvme_ctrlr_fail(d_ctx->ctrlr);
    6756             :                 }
    6757             :         }
    6758           2 : }
    6759             : 
    6760             : static void
    6761           0 : discovery_remove_controllers(struct discovery_ctx *ctx)
    6762             : {
    6763           0 :         struct spdk_nvmf_discovery_log_page *log_page = ctx->log_page;
    6764             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    6765             :         struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
    6766           0 :         struct spdk_nvme_transport_id old_trid = {};
    6767             :         uint64_t numrec, i;
    6768             :         bool found;
    6769             : 
    6770           0 :         numrec = from_le64(&log_page->numrec);
    6771           0 :         TAILQ_FOREACH_SAFE(entry_ctx, &ctx->nvm_entry_ctxs, tailq, tmp) {
    6772           0 :                 found = false;
    6773           0 :                 old_entry = &entry_ctx->entry;
    6774           0 :                 build_trid_from_log_page_entry(&old_trid, old_entry);
    6775           0 :                 for (i = 0; i < numrec; i++) {
    6776           0 :                         new_entry = &log_page->entries[i];
    6777           0 :                         if (!memcmp(old_entry, new_entry, sizeof(*old_entry))) {
    6778           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s found again\n",
    6779             :                                                   old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
    6780           0 :                                 found = true;
    6781           0 :                                 break;
    6782             :                         }
    6783             :                 }
    6784           0 :                 if (!found) {
    6785           0 :                         struct nvme_path_id path = {};
    6786             : 
    6787           0 :                         DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s not found\n",
    6788             :                                           old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
    6789             : 
    6790           0 :                         path.trid = entry_ctx->trid;
    6791           0 :                         bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
    6792           0 :                         TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
    6793           0 :                         free(entry_ctx);
    6794             :                 }
    6795             :         }
    6796           0 :         free(log_page);
    6797           0 :         ctx->log_page = NULL;
    6798           0 :         discovery_complete(ctx);
    6799           0 : }
    6800             : 
    6801             : static void
    6802           0 : complete_discovery_start(struct discovery_ctx *ctx, int status)
    6803             : {
    6804           0 :         ctx->timeout_ticks = 0;
    6805           0 :         ctx->rc = status;
    6806           0 :         if (ctx->start_cb_fn) {
    6807           0 :                 ctx->start_cb_fn(ctx->cb_ctx, status);
    6808           0 :                 ctx->start_cb_fn = NULL;
    6809           0 :                 ctx->cb_ctx = NULL;
    6810             :         }
    6811           0 : }
    6812             : 
    6813             : static void
    6814           0 : discovery_attach_controller_done(void *cb_ctx, size_t bdev_count, int rc)
    6815             : {
    6816           0 :         struct discovery_entry_ctx *entry_ctx = cb_ctx;
    6817           0 :         struct discovery_ctx *ctx = entry_ctx->ctx;
    6818             : 
    6819           0 :         DISCOVERY_INFOLOG(ctx, "attach %s done\n", entry_ctx->name);
    6820           0 :         ctx->attach_in_progress--;
    6821           0 :         if (ctx->attach_in_progress == 0) {
    6822           0 :                 complete_discovery_start(ctx, ctx->rc);
    6823           0 :                 if (ctx->initializing && ctx->rc != 0) {
    6824           0 :                         DISCOVERY_ERRLOG(ctx, "stopping discovery due to errors: %d\n", ctx->rc);
    6825           0 :                         stop_discovery(ctx, NULL, ctx->cb_ctx);
    6826             :                 } else {
    6827           0 :                         discovery_remove_controllers(ctx);
    6828             :                 }
    6829             :         }
    6830           0 : }
    6831             : 
    6832             : static struct discovery_entry_ctx *
    6833           0 : create_discovery_entry_ctx(struct discovery_ctx *ctx, struct spdk_nvme_transport_id *trid)
    6834             : {
    6835             :         struct discovery_entry_ctx *new_ctx;
    6836             : 
    6837           0 :         new_ctx = calloc(1, sizeof(*new_ctx));
    6838           0 :         if (new_ctx == NULL) {
    6839           0 :                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    6840           0 :                 return NULL;
    6841             :         }
    6842             : 
    6843           0 :         new_ctx->ctx = ctx;
    6844           0 :         memcpy(&new_ctx->trid, trid, sizeof(*trid));
    6845           0 :         spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
    6846           0 :         snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
    6847           0 :         return new_ctx;
    6848             : }
    6849             : 
    6850             : static void
    6851           0 : discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
    6852             :                       struct spdk_nvmf_discovery_log_page *log_page)
    6853             : {
    6854           0 :         struct discovery_ctx *ctx = cb_arg;
    6855             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    6856             :         struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
    6857             :         uint64_t numrec, i;
    6858             :         bool found;
    6859             : 
    6860           0 :         if (rc || spdk_nvme_cpl_is_error(cpl)) {
    6861           0 :                 DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
    6862           0 :                 return;
    6863             :         }
    6864             : 
    6865           0 :         ctx->log_page = log_page;
    6866           0 :         assert(ctx->attach_in_progress == 0);
    6867           0 :         numrec = from_le64(&log_page->numrec);
    6868           0 :         TAILQ_FOREACH_SAFE(entry_ctx, &ctx->discovery_entry_ctxs, tailq, tmp) {
    6869           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
    6870           0 :                 free(entry_ctx);
    6871             :         }
    6872           0 :         for (i = 0; i < numrec; i++) {
    6873           0 :                 found = false;
    6874           0 :                 new_entry = &log_page->entries[i];
    6875           0 :                 if (new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT ||
    6876           0 :                     new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
    6877             :                         struct discovery_entry_ctx *new_ctx;
    6878           0 :                         struct spdk_nvme_transport_id trid = {};
    6879             : 
    6880           0 :                         build_trid_from_log_page_entry(&trid, new_entry);
    6881           0 :                         new_ctx = create_discovery_entry_ctx(ctx, &trid);
    6882           0 :                         if (new_ctx == NULL) {
    6883           0 :                                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    6884           0 :                                 break;
    6885             :                         }
    6886             : 
    6887           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, new_ctx, tailq);
    6888           0 :                         continue;
    6889             :                 }
    6890           0 :                 TAILQ_FOREACH(entry_ctx, &ctx->nvm_entry_ctxs, tailq) {
    6891           0 :                         old_entry = &entry_ctx->entry;
    6892           0 :                         if (!memcmp(new_entry, old_entry, sizeof(*new_entry))) {
    6893           0 :                                 found = true;
    6894           0 :                                 break;
    6895             :                         }
    6896             :                 }
    6897           0 :                 if (!found) {
    6898           0 :                         struct discovery_entry_ctx *subnqn_ctx = NULL, *new_ctx;
    6899             :                         struct discovery_ctx *d_ctx;
    6900             : 
    6901           0 :                         TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
    6902           0 :                                 TAILQ_FOREACH(subnqn_ctx, &d_ctx->nvm_entry_ctxs, tailq) {
    6903           0 :                                         if (!memcmp(subnqn_ctx->entry.subnqn, new_entry->subnqn,
    6904             :                                                     sizeof(new_entry->subnqn))) {
    6905           0 :                                                 break;
    6906             :                                         }
    6907             :                                 }
    6908           0 :                                 if (subnqn_ctx) {
    6909           0 :                                         break;
    6910             :                                 }
    6911             :                         }
    6912             : 
    6913           0 :                         new_ctx = calloc(1, sizeof(*new_ctx));
    6914           0 :                         if (new_ctx == NULL) {
    6915           0 :                                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    6916           0 :                                 break;
    6917             :                         }
    6918             : 
    6919           0 :                         new_ctx->ctx = ctx;
    6920           0 :                         memcpy(&new_ctx->entry, new_entry, sizeof(*new_entry));
    6921           0 :                         build_trid_from_log_page_entry(&new_ctx->trid, new_entry);
    6922           0 :                         if (subnqn_ctx) {
    6923           0 :                                 snprintf(new_ctx->name, sizeof(new_ctx->name), "%s", subnqn_ctx->name);
    6924           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new path for %s\n",
    6925             :                                                   new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
    6926             :                                                   new_ctx->name);
    6927             :                         } else {
    6928           0 :                                 snprintf(new_ctx->name, sizeof(new_ctx->name), "%s%d", ctx->name, ctx->index++);
    6929           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new subsystem %s\n",
    6930             :                                                   new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
    6931             :                                                   new_ctx->name);
    6932             :                         }
    6933           0 :                         spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
    6934           0 :                         snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
    6935           0 :                         rc = bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0,
    6936             :                                               discovery_attach_controller_done, new_ctx,
    6937             :                                               &new_ctx->drv_opts, &ctx->bdev_opts, true);
    6938           0 :                         if (rc == 0) {
    6939           0 :                                 TAILQ_INSERT_TAIL(&ctx->nvm_entry_ctxs, new_ctx, tailq);
    6940           0 :                                 ctx->attach_in_progress++;
    6941             :                         } else {
    6942           0 :                                 DISCOVERY_ERRLOG(ctx, "bdev_nvme_create failed (%s)\n", spdk_strerror(-rc));
    6943             :                         }
    6944             :                 }
    6945             :         }
    6946             : 
    6947           0 :         if (ctx->attach_in_progress == 0) {
    6948           0 :                 discovery_remove_controllers(ctx);
    6949             :         }
    6950             : }
    6951             : 
    6952             : static void
    6953           0 : get_discovery_log_page(struct discovery_ctx *ctx)
    6954             : {
    6955             :         int rc;
    6956             : 
    6957           0 :         assert(ctx->in_progress == false);
    6958           0 :         ctx->in_progress = true;
    6959           0 :         rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx);
    6960           0 :         if (rc != 0) {
    6961           0 :                 DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
    6962             :         }
    6963           0 :         DISCOVERY_INFOLOG(ctx, "sent discovery log page command\n");
    6964           0 : }
    6965             : 
    6966             : static void
    6967           0 : discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
    6968             : {
    6969           0 :         struct discovery_ctx *ctx = arg;
    6970           0 :         uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
    6971             : 
    6972           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    6973           0 :                 DISCOVERY_ERRLOG(ctx, "aer failed\n");
    6974           0 :                 return;
    6975             :         }
    6976             : 
    6977           0 :         if (log_page_id != SPDK_NVME_LOG_DISCOVERY) {
    6978           0 :                 DISCOVERY_ERRLOG(ctx, "unexpected log page 0x%x\n", log_page_id);
    6979           0 :                 return;
    6980             :         }
    6981             : 
    6982           0 :         DISCOVERY_INFOLOG(ctx, "got aer\n");
    6983           0 :         if (ctx->in_progress) {
    6984           0 :                 ctx->pending = true;
    6985           0 :                 return;
    6986             :         }
    6987             : 
    6988           0 :         get_discovery_log_page(ctx);
    6989             : }
    6990             : 
    6991             : static void
    6992           0 : discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    6993             :                     struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
    6994             : {
    6995           0 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    6996             :         struct discovery_ctx *ctx;
    6997             : 
    6998           0 :         ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, drv_opts);
    6999             : 
    7000           0 :         DISCOVERY_INFOLOG(ctx, "discovery ctrlr attached\n");
    7001           0 :         ctx->probe_ctx = NULL;
    7002           0 :         ctx->ctrlr = ctrlr;
    7003             : 
    7004           0 :         if (ctx->rc != 0) {
    7005           0 :                 DISCOVERY_ERRLOG(ctx, "encountered error while attaching discovery ctrlr: %d\n",
    7006             :                                  ctx->rc);
    7007           0 :                 return;
    7008             :         }
    7009             : 
    7010           0 :         spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx);
    7011             : }
    7012             : 
    7013             : static int
    7014           0 : discovery_poller(void *arg)
    7015             : {
    7016           0 :         struct discovery_ctx *ctx = arg;
    7017             :         struct spdk_nvme_transport_id *trid;
    7018             :         int rc;
    7019             : 
    7020           0 :         if (ctx->detach_ctx) {
    7021           0 :                 rc = spdk_nvme_detach_poll_async(ctx->detach_ctx);
    7022           0 :                 if (rc != -EAGAIN) {
    7023           0 :                         ctx->detach_ctx = NULL;
    7024           0 :                         ctx->ctrlr = NULL;
    7025             :                 }
    7026           0 :         } else if (ctx->stop) {
    7027           0 :                 if (ctx->ctrlr != NULL) {
    7028           0 :                         rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
    7029           0 :                         if (rc == 0) {
    7030           0 :                                 return SPDK_POLLER_BUSY;
    7031             :                         }
    7032           0 :                         DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
    7033             :                 }
    7034           0 :                 spdk_poller_unregister(&ctx->poller);
    7035           0 :                 TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
    7036           0 :                 assert(ctx->start_cb_fn == NULL);
    7037           0 :                 if (ctx->stop_cb_fn != NULL) {
    7038           0 :                         ctx->stop_cb_fn(ctx->cb_ctx);
    7039             :                 }
    7040           0 :                 free_discovery_ctx(ctx);
    7041           0 :         } else if (ctx->probe_ctx == NULL && ctx->ctrlr == NULL) {
    7042           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    7043           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
    7044           0 :                         assert(ctx->initializing);
    7045           0 :                         spdk_poller_unregister(&ctx->poller);
    7046           0 :                         TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
    7047           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    7048           0 :                         stop_discovery(ctx, NULL, NULL);
    7049           0 :                         free_discovery_ctx(ctx);
    7050           0 :                         return SPDK_POLLER_BUSY;
    7051             :                 }
    7052             : 
    7053           0 :                 assert(ctx->entry_ctx_in_use == NULL);
    7054           0 :                 ctx->entry_ctx_in_use = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
    7055           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    7056           0 :                 trid = &ctx->entry_ctx_in_use->trid;
    7057           0 :                 ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, discovery_attach_cb);
    7058           0 :                 if (ctx->probe_ctx) {
    7059           0 :                         spdk_poller_unregister(&ctx->poller);
    7060           0 :                         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000);
    7061             :                 } else {
    7062           0 :                         DISCOVERY_ERRLOG(ctx, "could not start discovery connect\n");
    7063           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    7064           0 :                         ctx->entry_ctx_in_use = NULL;
    7065             :                 }
    7066           0 :         } else if (ctx->probe_ctx) {
    7067           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    7068           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
    7069           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    7070           0 :                         return SPDK_POLLER_BUSY;
    7071             :                 }
    7072             : 
    7073           0 :                 rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
    7074           0 :                 if (rc != -EAGAIN) {
    7075           0 :                         if (ctx->rc != 0) {
    7076           0 :                                 assert(ctx->initializing);
    7077           0 :                                 stop_discovery(ctx, NULL, ctx->cb_ctx);
    7078             :                         } else {
    7079           0 :                                 assert(rc == 0);
    7080           0 :                                 DISCOVERY_INFOLOG(ctx, "discovery ctrlr connected\n");
    7081           0 :                                 ctx->rc = rc;
    7082           0 :                                 get_discovery_log_page(ctx);
    7083             :                         }
    7084             :                 }
    7085             :         } else {
    7086           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    7087           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching NVM ctrlrs\n");
    7088           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    7089             :                         /* We need to wait until all NVM ctrlrs are attached before we stop the
    7090             :                          * discovery service to make sure we don't detach a ctrlr that is still
    7091             :                          * being attached.
    7092             :                          */
    7093           0 :                         if (ctx->attach_in_progress == 0) {
    7094           0 :                                 stop_discovery(ctx, NULL, ctx->cb_ctx);
    7095           0 :                                 return SPDK_POLLER_BUSY;
    7096             :                         }
    7097             :                 }
    7098             : 
    7099           0 :                 rc = spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr);
    7100           0 :                 if (rc < 0) {
    7101           0 :                         spdk_poller_unregister(&ctx->poller);
    7102           0 :                         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
    7103           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    7104           0 :                         ctx->entry_ctx_in_use = NULL;
    7105             : 
    7106           0 :                         rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
    7107           0 :                         if (rc != 0) {
    7108           0 :                                 DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
    7109           0 :                                 ctx->ctrlr = NULL;
    7110             :                         }
    7111             :                 }
    7112             :         }
    7113             : 
    7114           0 :         return SPDK_POLLER_BUSY;
    7115             : }
    7116             : 
    7117             : static void
    7118           0 : start_discovery_poller(void *arg)
    7119             : {
    7120           0 :         struct discovery_ctx *ctx = arg;
    7121             : 
    7122           0 :         TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq);
    7123           0 :         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
    7124           0 : }
    7125             : 
    7126             : int
    7127           0 : bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid,
    7128             :                           const char *base_name,
    7129             :                           struct spdk_nvme_ctrlr_opts *drv_opts,
    7130             :                           struct nvme_ctrlr_opts *bdev_opts,
    7131             :                           uint64_t attach_timeout,
    7132             :                           bool from_mdns,
    7133             :                           spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx)
    7134             : {
    7135             :         struct discovery_ctx *ctx;
    7136             :         struct discovery_entry_ctx *discovery_entry_ctx;
    7137             : 
    7138           0 :         snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
    7139           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7140           0 :                 if (strcmp(ctx->name, base_name) == 0) {
    7141           0 :                         return -EEXIST;
    7142             :                 }
    7143             : 
    7144           0 :                 if (ctx->entry_ctx_in_use != NULL) {
    7145           0 :                         if (!spdk_nvme_transport_id_compare(trid, &ctx->entry_ctx_in_use->trid)) {
    7146           0 :                                 return -EEXIST;
    7147             :                         }
    7148             :                 }
    7149             : 
    7150           0 :                 TAILQ_FOREACH(discovery_entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
    7151           0 :                         if (!spdk_nvme_transport_id_compare(trid, &discovery_entry_ctx->trid)) {
    7152           0 :                                 return -EEXIST;
    7153             :                         }
    7154             :                 }
    7155             :         }
    7156             : 
    7157           0 :         ctx = calloc(1, sizeof(*ctx));
    7158           0 :         if (ctx == NULL) {
    7159           0 :                 return -ENOMEM;
    7160             :         }
    7161             : 
    7162           0 :         ctx->name = strdup(base_name);
    7163           0 :         if (ctx->name == NULL) {
    7164           0 :                 free_discovery_ctx(ctx);
    7165           0 :                 return -ENOMEM;
    7166             :         }
    7167           0 :         memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
    7168           0 :         memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
    7169           0 :         ctx->from_mdns_discovery_service = from_mdns;
    7170           0 :         ctx->bdev_opts.from_discovery_service = true;
    7171           0 :         ctx->calling_thread = spdk_get_thread();
    7172           0 :         ctx->start_cb_fn = cb_fn;
    7173           0 :         ctx->cb_ctx = cb_ctx;
    7174           0 :         ctx->initializing = true;
    7175           0 :         if (ctx->start_cb_fn) {
    7176             :                 /* We can use this when dumping json to denote if this RPC parameter
    7177             :                  * was specified or not.
    7178             :                  */
    7179           0 :                 ctx->wait_for_attach = true;
    7180             :         }
    7181           0 :         if (attach_timeout != 0) {
    7182           0 :                 ctx->timeout_ticks = spdk_get_ticks() + attach_timeout *
    7183           0 :                                      spdk_get_ticks_hz() / 1000ull;
    7184             :         }
    7185           0 :         TAILQ_INIT(&ctx->nvm_entry_ctxs);
    7186           0 :         TAILQ_INIT(&ctx->discovery_entry_ctxs);
    7187           0 :         memcpy(&ctx->trid, trid, sizeof(*trid));
    7188             :         /* Even if user did not specify hostnqn, we can still strdup("\0"); */
    7189           0 :         ctx->hostnqn = strdup(ctx->drv_opts.hostnqn);
    7190           0 :         if (ctx->hostnqn == NULL) {
    7191           0 :                 free_discovery_ctx(ctx);
    7192           0 :                 return -ENOMEM;
    7193             :         }
    7194           0 :         discovery_entry_ctx = create_discovery_entry_ctx(ctx, trid);
    7195           0 :         if (discovery_entry_ctx == NULL) {
    7196           0 :                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7197           0 :                 free_discovery_ctx(ctx);
    7198           0 :                 return -ENOMEM;
    7199             :         }
    7200             : 
    7201           0 :         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, discovery_entry_ctx, tailq);
    7202           0 :         spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx);
    7203           0 :         return 0;
    7204             : }
    7205             : 
    7206             : int
    7207           0 : bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
    7208             : {
    7209             :         struct discovery_ctx *ctx;
    7210             : 
    7211           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7212           0 :                 if (strcmp(name, ctx->name) == 0) {
    7213           0 :                         if (ctx->stop) {
    7214           0 :                                 return -EALREADY;
    7215             :                         }
    7216             :                         /* If we're still starting the discovery service and ->rc is non-zero, we're
    7217             :                          * going to stop it as soon as we can
    7218             :                          */
    7219           0 :                         if (ctx->initializing && ctx->rc != 0) {
    7220           0 :                                 return -EALREADY;
    7221             :                         }
    7222           0 :                         stop_discovery(ctx, cb_fn, cb_ctx);
    7223           0 :                         return 0;
    7224             :                 }
    7225             :         }
    7226             : 
    7227           0 :         return -ENOENT;
    7228             : }
    7229             : 
    7230             : static int
    7231           1 : bdev_nvme_library_init(void)
    7232             : {
    7233           1 :         g_bdev_nvme_init_thread = spdk_get_thread();
    7234             : 
    7235           1 :         spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_create_poll_group_cb,
    7236             :                                 bdev_nvme_destroy_poll_group_cb,
    7237             :                                 sizeof(struct nvme_poll_group),  "nvme_poll_groups");
    7238             : 
    7239           1 :         return 0;
    7240             : }
    7241             : 
    7242             : static void
    7243           1 : bdev_nvme_fini_destruct_ctrlrs(void)
    7244             : {
    7245             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    7246             :         struct nvme_ctrlr *nvme_ctrlr;
    7247             : 
    7248           1 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    7249           1 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
    7250           0 :                 TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    7251           0 :                         pthread_mutex_lock(&nvme_ctrlr->mutex);
    7252           0 :                         if (nvme_ctrlr->destruct) {
    7253             :                                 /* This controller's destruction was already started
    7254             :                                  * before the application started shutting down
    7255             :                                  */
    7256           0 :                                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    7257           0 :                                 continue;
    7258             :                         }
    7259           0 :                         nvme_ctrlr->destruct = true;
    7260           0 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    7261             : 
    7262           0 :                         spdk_thread_send_msg(nvme_ctrlr->thread, _nvme_ctrlr_destruct,
    7263             :                                              nvme_ctrlr);
    7264             :                 }
    7265             :         }
    7266             : 
    7267           1 :         g_bdev_nvme_module_finish = true;
    7268           1 :         if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    7269           1 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7270           1 :                 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
    7271           1 :                 spdk_bdev_module_fini_done();
    7272           1 :                 return;
    7273             :         }
    7274             : 
    7275           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7276             : }
    7277             : 
    7278             : static void
    7279           0 : check_discovery_fini(void *arg)
    7280             : {
    7281           0 :         if (TAILQ_EMPTY(&g_discovery_ctxs)) {
    7282           0 :                 bdev_nvme_fini_destruct_ctrlrs();
    7283             :         }
    7284           0 : }
    7285             : 
    7286             : static void
    7287           1 : bdev_nvme_library_fini(void)
    7288             : {
    7289             :         struct nvme_probe_skip_entry *entry, *entry_tmp;
    7290             :         struct discovery_ctx *ctx;
    7291             : 
    7292           1 :         spdk_poller_unregister(&g_hotplug_poller);
    7293           1 :         free(g_hotplug_probe_ctx);
    7294           1 :         g_hotplug_probe_ctx = NULL;
    7295             : 
    7296           1 :         TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
    7297           0 :                 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
    7298           0 :                 free(entry);
    7299             :         }
    7300             : 
    7301           1 :         assert(spdk_get_thread() == g_bdev_nvme_init_thread);
    7302           1 :         if (TAILQ_EMPTY(&g_discovery_ctxs)) {
    7303           1 :                 bdev_nvme_fini_destruct_ctrlrs();
    7304             :         } else {
    7305           0 :                 TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7306           0 :                         stop_discovery(ctx, check_discovery_fini, NULL);
    7307             :                 }
    7308             :         }
    7309           1 : }
    7310             : 
    7311             : static void
    7312           0 : bdev_nvme_verify_pi_error(struct nvme_bdev_io *bio)
    7313             : {
    7314           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7315           0 :         struct spdk_bdev *bdev = bdev_io->bdev;
    7316           0 :         struct spdk_dif_ctx dif_ctx;
    7317           0 :         struct spdk_dif_error err_blk = {};
    7318             :         int rc;
    7319           0 :         struct spdk_dif_ctx_init_ext_opts dif_opts;
    7320             : 
    7321           0 :         dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
    7322           0 :         dif_opts.dif_pi_format = bdev->dif_pi_format;
    7323           0 :         rc = spdk_dif_ctx_init(&dif_ctx,
    7324           0 :                                bdev->blocklen, bdev->md_len, bdev->md_interleave,
    7325           0 :                                bdev->dif_is_head_of_md, bdev->dif_type,
    7326             :                                bdev_io->u.bdev.dif_check_flags,
    7327           0 :                                bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0, &dif_opts);
    7328           0 :         if (rc != 0) {
    7329           0 :                 SPDK_ERRLOG("Initialization of DIF context failed\n");
    7330           0 :                 return;
    7331             :         }
    7332             : 
    7333           0 :         if (bdev->md_interleave) {
    7334           0 :                 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
    7335           0 :                                      bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
    7336             :         } else {
    7337           0 :                 struct iovec md_iov = {
    7338           0 :                         .iov_base       = bdev_io->u.bdev.md_buf,
    7339           0 :                         .iov_len        = bdev_io->u.bdev.num_blocks * bdev->md_len,
    7340             :                 };
    7341             : 
    7342           0 :                 rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
    7343           0 :                                      &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
    7344             :         }
    7345             : 
    7346           0 :         if (rc != 0) {
    7347           0 :                 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
    7348             :                             err_blk.err_type, err_blk.err_offset);
    7349             :         } else {
    7350           0 :                 SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
    7351             :         }
    7352             : }
    7353             : 
    7354             : static void
    7355           0 : bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7356             : {
    7357           0 :         struct nvme_bdev_io *bio = ref;
    7358             : 
    7359           0 :         if (spdk_nvme_cpl_is_success(cpl)) {
    7360             :                 /* Run PI verification for read data buffer. */
    7361           0 :                 bdev_nvme_verify_pi_error(bio);
    7362             :         }
    7363             : 
    7364             :         /* Return original completion status */
    7365           0 :         bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
    7366           0 : }
    7367             : 
    7368             : static void
    7369           3 : bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7370             : {
    7371           3 :         struct nvme_bdev_io *bio = ref;
    7372           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7373             :         int ret;
    7374             : 
    7375           3 :         if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
    7376           0 :                 SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
    7377             :                             cpl->status.sct, cpl->status.sc);
    7378             : 
    7379             :                 /* Save completion status to use after verifying PI error. */
    7380           0 :                 bio->cpl = *cpl;
    7381             : 
    7382           0 :                 if (spdk_likely(nvme_io_path_is_available(bio->io_path))) {
    7383             :                         /* Read without PI checking to verify PI error. */
    7384           0 :                         ret = bdev_nvme_no_pi_readv(bio,
    7385             :                                                     bdev_io->u.bdev.iovs,
    7386             :                                                     bdev_io->u.bdev.iovcnt,
    7387             :                                                     bdev_io->u.bdev.md_buf,
    7388             :                                                     bdev_io->u.bdev.num_blocks,
    7389             :                                                     bdev_io->u.bdev.offset_blocks);
    7390           0 :                         if (ret == 0) {
    7391           0 :                                 return;
    7392             :                         }
    7393             :                 }
    7394             :         }
    7395             : 
    7396           3 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7397             : }
    7398             : 
    7399             : static void
    7400          25 : bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7401             : {
    7402          25 :         struct nvme_bdev_io *bio = ref;
    7403             : 
    7404          25 :         if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
    7405           0 :                 SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
    7406             :                             cpl->status.sct, cpl->status.sc);
    7407             :                 /* Run PI verification for write data buffer if PI error is detected. */
    7408           0 :                 bdev_nvme_verify_pi_error(bio);
    7409             :         }
    7410             : 
    7411          25 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7412          25 : }
    7413             : 
    7414             : static void
    7415           0 : bdev_nvme_zone_appendv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7416             : {
    7417           0 :         struct nvme_bdev_io *bio = ref;
    7418           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7419             : 
    7420             :         /* spdk_bdev_io_get_append_location() requires that the ALBA is stored in offset_blocks.
    7421             :          * Additionally, offset_blocks has to be set before calling bdev_nvme_verify_pi_error().
    7422             :          */
    7423           0 :         bdev_io->u.bdev.offset_blocks = *(uint64_t *)&cpl->cdw0;
    7424             : 
    7425           0 :         if (spdk_nvme_cpl_is_pi_error(cpl)) {
    7426           0 :                 SPDK_ERRLOG("zone append completed with PI error (sct=%d, sc=%d)\n",
    7427             :                             cpl->status.sct, cpl->status.sc);
    7428             :                 /* Run PI verification for zone append data buffer if PI error is detected. */
    7429           0 :                 bdev_nvme_verify_pi_error(bio);
    7430             :         }
    7431             : 
    7432           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7433           0 : }
    7434             : 
    7435             : static void
    7436           1 : bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7437             : {
    7438           1 :         struct nvme_bdev_io *bio = ref;
    7439             : 
    7440           1 :         if (spdk_nvme_cpl_is_pi_error(cpl)) {
    7441           0 :                 SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n",
    7442             :                             cpl->status.sct, cpl->status.sc);
    7443             :                 /* Run PI verification for compare data buffer if PI error is detected. */
    7444           0 :                 bdev_nvme_verify_pi_error(bio);
    7445             :         }
    7446             : 
    7447           1 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7448           1 : }
    7449             : 
    7450             : static void
    7451           4 : bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7452             : {
    7453           4 :         struct nvme_bdev_io *bio = ref;
    7454             : 
    7455             :         /* Compare operation completion */
    7456           4 :         if (!bio->first_fused_completed) {
    7457             :                 /* Save compare result for write callback */
    7458           2 :                 bio->cpl = *cpl;
    7459           2 :                 bio->first_fused_completed = true;
    7460           2 :                 return;
    7461             :         }
    7462             : 
    7463             :         /* Write operation completion */
    7464           2 :         if (spdk_nvme_cpl_is_error(&bio->cpl)) {
    7465             :                 /* If bio->cpl is already an error, it means the compare operation failed.  In that case,
    7466             :                  * complete the IO with the compare operation's status.
    7467             :                  */
    7468           1 :                 if (!spdk_nvme_cpl_is_error(cpl)) {
    7469           1 :                         SPDK_ERRLOG("Unexpected write success after compare failure.\n");
    7470             :                 }
    7471             : 
    7472           1 :                 bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
    7473             :         } else {
    7474           1 :                 bdev_nvme_io_complete_nvme_status(bio, cpl);
    7475             :         }
    7476             : }
    7477             : 
    7478             : static void
    7479           1 : bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7480             : {
    7481           1 :         struct nvme_bdev_io *bio = ref;
    7482             : 
    7483           1 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7484           1 : }
    7485             : 
    7486             : static int
    7487           0 : fill_zone_from_report(struct spdk_bdev_zone_info *info, struct spdk_nvme_zns_zone_desc *desc)
    7488             : {
    7489           0 :         switch (desc->zt) {
    7490           0 :         case SPDK_NVME_ZONE_TYPE_SEQWR:
    7491           0 :                 info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
    7492           0 :                 break;
    7493           0 :         default:
    7494           0 :                 SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", desc->zt);
    7495           0 :                 return -EIO;
    7496             :         }
    7497             : 
    7498           0 :         switch (desc->zs) {
    7499           0 :         case SPDK_NVME_ZONE_STATE_EMPTY:
    7500           0 :                 info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
    7501           0 :                 break;
    7502           0 :         case SPDK_NVME_ZONE_STATE_IOPEN:
    7503           0 :                 info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
    7504           0 :                 break;
    7505           0 :         case SPDK_NVME_ZONE_STATE_EOPEN:
    7506           0 :                 info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
    7507           0 :                 break;
    7508           0 :         case SPDK_NVME_ZONE_STATE_CLOSED:
    7509           0 :                 info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
    7510           0 :                 break;
    7511           0 :         case SPDK_NVME_ZONE_STATE_RONLY:
    7512           0 :                 info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
    7513           0 :                 break;
    7514           0 :         case SPDK_NVME_ZONE_STATE_FULL:
    7515           0 :                 info->state = SPDK_BDEV_ZONE_STATE_FULL;
    7516           0 :                 break;
    7517           0 :         case SPDK_NVME_ZONE_STATE_OFFLINE:
    7518           0 :                 info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
    7519           0 :                 break;
    7520           0 :         default:
    7521           0 :                 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", desc->zs);
    7522           0 :                 return -EIO;
    7523             :         }
    7524             : 
    7525           0 :         info->zone_id = desc->zslba;
    7526           0 :         info->write_pointer = desc->wp;
    7527           0 :         info->capacity = desc->zcap;
    7528             : 
    7529           0 :         return 0;
    7530             : }
    7531             : 
    7532             : static void
    7533           0 : bdev_nvme_get_zone_info_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7534             : {
    7535           0 :         struct nvme_bdev_io *bio = ref;
    7536           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7537           0 :         uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
    7538           0 :         uint32_t zones_to_copy = bdev_io->u.zone_mgmt.num_zones;
    7539           0 :         struct spdk_bdev_zone_info *info = bdev_io->u.zone_mgmt.buf;
    7540             :         uint64_t max_zones_per_buf, i;
    7541             :         uint32_t zone_report_bufsize;
    7542             :         struct spdk_nvme_ns *ns;
    7543             :         struct spdk_nvme_qpair *qpair;
    7544             :         int ret;
    7545             : 
    7546           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    7547           0 :                 goto out_complete_io_nvme_cpl;
    7548             :         }
    7549             : 
    7550           0 :         if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
    7551           0 :                 ret = -ENXIO;
    7552           0 :                 goto out_complete_io_ret;
    7553             :         }
    7554             : 
    7555           0 :         ns = bio->io_path->nvme_ns->ns;
    7556           0 :         qpair = bio->io_path->qpair->qpair;
    7557             : 
    7558           0 :         zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
    7559           0 :         max_zones_per_buf = (zone_report_bufsize - sizeof(*bio->zone_report_buf)) /
    7560             :                             sizeof(bio->zone_report_buf->descs[0]);
    7561             : 
    7562           0 :         if (bio->zone_report_buf->nr_zones > max_zones_per_buf) {
    7563           0 :                 ret = -EINVAL;
    7564           0 :                 goto out_complete_io_ret;
    7565             :         }
    7566             : 
    7567           0 :         if (!bio->zone_report_buf->nr_zones) {
    7568           0 :                 ret = -EINVAL;
    7569           0 :                 goto out_complete_io_ret;
    7570             :         }
    7571             : 
    7572           0 :         for (i = 0; i < bio->zone_report_buf->nr_zones && bio->handled_zones < zones_to_copy; i++) {
    7573           0 :                 ret = fill_zone_from_report(&info[bio->handled_zones],
    7574           0 :                                             &bio->zone_report_buf->descs[i]);
    7575           0 :                 if (ret) {
    7576           0 :                         goto out_complete_io_ret;
    7577             :                 }
    7578           0 :                 bio->handled_zones++;
    7579             :         }
    7580             : 
    7581           0 :         if (bio->handled_zones < zones_to_copy) {
    7582           0 :                 uint64_t zone_size_lba = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    7583           0 :                 uint64_t slba = zone_id + (zone_size_lba * bio->handled_zones);
    7584             : 
    7585           0 :                 memset(bio->zone_report_buf, 0, zone_report_bufsize);
    7586           0 :                 ret = spdk_nvme_zns_report_zones(ns, qpair,
    7587           0 :                                                  bio->zone_report_buf, zone_report_bufsize,
    7588             :                                                  slba, SPDK_NVME_ZRA_LIST_ALL, true,
    7589             :                                                  bdev_nvme_get_zone_info_done, bio);
    7590           0 :                 if (!ret) {
    7591           0 :                         return;
    7592             :                 } else {
    7593           0 :                         goto out_complete_io_ret;
    7594             :                 }
    7595             :         }
    7596             : 
    7597           0 : out_complete_io_nvme_cpl:
    7598           0 :         free(bio->zone_report_buf);
    7599           0 :         bio->zone_report_buf = NULL;
    7600           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7601           0 :         return;
    7602             : 
    7603           0 : out_complete_io_ret:
    7604           0 :         free(bio->zone_report_buf);
    7605           0 :         bio->zone_report_buf = NULL;
    7606           0 :         bdev_nvme_io_complete(bio, ret);
    7607             : }
    7608             : 
    7609             : static void
    7610           0 : bdev_nvme_zone_management_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7611             : {
    7612           0 :         struct nvme_bdev_io *bio = ref;
    7613             : 
    7614           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7615           0 : }
    7616             : 
    7617             : static void
    7618           4 : bdev_nvme_admin_passthru_complete_nvme_status(void *ctx)
    7619             : {
    7620           4 :         struct nvme_bdev_io *bio = ctx;
    7621           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7622           4 :         const struct spdk_nvme_cpl *cpl = &bio->cpl;
    7623             : 
    7624           4 :         assert(bdev_nvme_io_type_is_admin(bdev_io->type));
    7625             : 
    7626           4 :         __bdev_nvme_io_complete(bdev_io, 0, cpl);
    7627           4 : }
    7628             : 
    7629             : static void
    7630           3 : bdev_nvme_abort_complete(void *ctx)
    7631             : {
    7632           3 :         struct nvme_bdev_io *bio = ctx;
    7633           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7634             : 
    7635           3 :         if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) {
    7636           3 :                 __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS, NULL);
    7637             :         } else {
    7638           0 :                 __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED, NULL);
    7639             :         }
    7640           3 : }
    7641             : 
    7642             : static void
    7643           3 : bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7644             : {
    7645           3 :         struct nvme_bdev_io *bio = ref;
    7646           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7647             : 
    7648           3 :         bio->cpl = *cpl;
    7649           3 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_nvme_abort_complete, bio);
    7650           3 : }
    7651             : 
    7652             : static void
    7653           4 : bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7654             : {
    7655           4 :         struct nvme_bdev_io *bio = ref;
    7656           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7657             : 
    7658           4 :         bio->cpl = *cpl;
    7659           4 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
    7660             :                              bdev_nvme_admin_passthru_complete_nvme_status, bio);
    7661           4 : }
    7662             : 
    7663             : static void
    7664           0 : bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
    7665             : {
    7666           0 :         struct nvme_bdev_io *bio = ref;
    7667             :         struct iovec *iov;
    7668             : 
    7669           0 :         bio->iov_offset = sgl_offset;
    7670           0 :         for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
    7671           0 :                 iov = &bio->iovs[bio->iovpos];
    7672           0 :                 if (bio->iov_offset < iov->iov_len) {
    7673           0 :                         break;
    7674             :                 }
    7675             : 
    7676           0 :                 bio->iov_offset -= iov->iov_len;
    7677             :         }
    7678           0 : }
    7679             : 
    7680             : static int
    7681           0 : bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
    7682             : {
    7683           0 :         struct nvme_bdev_io *bio = ref;
    7684             :         struct iovec *iov;
    7685             : 
    7686           0 :         assert(bio->iovpos < bio->iovcnt);
    7687             : 
    7688           0 :         iov = &bio->iovs[bio->iovpos];
    7689             : 
    7690           0 :         *address = iov->iov_base;
    7691           0 :         *length = iov->iov_len;
    7692             : 
    7693           0 :         if (bio->iov_offset) {
    7694           0 :                 assert(bio->iov_offset <= iov->iov_len);
    7695           0 :                 *address += bio->iov_offset;
    7696           0 :                 *length -= bio->iov_offset;
    7697             :         }
    7698             : 
    7699           0 :         bio->iov_offset += *length;
    7700           0 :         if (bio->iov_offset == iov->iov_len) {
    7701           0 :                 bio->iovpos++;
    7702           0 :                 bio->iov_offset = 0;
    7703             :         }
    7704             : 
    7705           0 :         return 0;
    7706             : }
    7707             : 
    7708             : static void
    7709           0 : bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset)
    7710             : {
    7711           0 :         struct nvme_bdev_io *bio = ref;
    7712             :         struct iovec *iov;
    7713             : 
    7714           0 :         bio->fused_iov_offset = sgl_offset;
    7715           0 :         for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) {
    7716           0 :                 iov = &bio->fused_iovs[bio->fused_iovpos];
    7717           0 :                 if (bio->fused_iov_offset < iov->iov_len) {
    7718           0 :                         break;
    7719             :                 }
    7720             : 
    7721           0 :                 bio->fused_iov_offset -= iov->iov_len;
    7722             :         }
    7723           0 : }
    7724             : 
    7725             : static int
    7726           0 : bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length)
    7727             : {
    7728           0 :         struct nvme_bdev_io *bio = ref;
    7729             :         struct iovec *iov;
    7730             : 
    7731           0 :         assert(bio->fused_iovpos < bio->fused_iovcnt);
    7732             : 
    7733           0 :         iov = &bio->fused_iovs[bio->fused_iovpos];
    7734             : 
    7735           0 :         *address = iov->iov_base;
    7736           0 :         *length = iov->iov_len;
    7737             : 
    7738           0 :         if (bio->fused_iov_offset) {
    7739           0 :                 assert(bio->fused_iov_offset <= iov->iov_len);
    7740           0 :                 *address += bio->fused_iov_offset;
    7741           0 :                 *length -= bio->fused_iov_offset;
    7742             :         }
    7743             : 
    7744           0 :         bio->fused_iov_offset += *length;
    7745           0 :         if (bio->fused_iov_offset == iov->iov_len) {
    7746           0 :                 bio->fused_iovpos++;
    7747           0 :                 bio->fused_iov_offset = 0;
    7748             :         }
    7749             : 
    7750           0 :         return 0;
    7751             : }
    7752             : 
    7753             : static int
    7754           0 : bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7755             :                       void *md, uint64_t lba_count, uint64_t lba)
    7756             : {
    7757             :         int rc;
    7758             : 
    7759           0 :         SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 " without PI check\n",
    7760             :                       lba_count, lba);
    7761             : 
    7762           0 :         bio->iovs = iov;
    7763           0 :         bio->iovcnt = iovcnt;
    7764           0 :         bio->iovpos = 0;
    7765           0 :         bio->iov_offset = 0;
    7766             : 
    7767           0 :         rc = spdk_nvme_ns_cmd_readv_with_md(bio->io_path->nvme_ns->ns,
    7768           0 :                                             bio->io_path->qpair->qpair,
    7769             :                                             lba, lba_count,
    7770             :                                             bdev_nvme_no_pi_readv_done, bio, 0,
    7771             :                                             bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    7772             :                                             md, 0, 0);
    7773             : 
    7774           0 :         if (rc != 0 && rc != -ENOMEM) {
    7775           0 :                 SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
    7776             :         }
    7777           0 :         return rc;
    7778             : }
    7779             : 
    7780             : static int
    7781           3 : bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7782             :                 void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
    7783             :                 struct spdk_memory_domain *domain, void *domain_ctx,
    7784             :                 struct spdk_accel_sequence *seq)
    7785             : {
    7786           3 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7787           3 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7788             :         int rc;
    7789             : 
    7790           3 :         SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    7791             :                       lba_count, lba);
    7792             : 
    7793           3 :         bio->iovs = iov;
    7794           3 :         bio->iovcnt = iovcnt;
    7795           3 :         bio->iovpos = 0;
    7796           3 :         bio->iov_offset = 0;
    7797             : 
    7798           3 :         if (domain != NULL || seq != NULL) {
    7799           1 :                 bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
    7800           1 :                 bio->ext_opts.memory_domain = domain;
    7801           1 :                 bio->ext_opts.memory_domain_ctx = domain_ctx;
    7802           1 :                 bio->ext_opts.io_flags = flags;
    7803           1 :                 bio->ext_opts.metadata = md;
    7804           1 :                 bio->ext_opts.accel_sequence = seq;
    7805             : 
    7806           1 :                 if (iovcnt == 1) {
    7807           1 :                         rc = spdk_nvme_ns_cmd_read_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_readv_done,
    7808             :                                                        bio, &bio->ext_opts);
    7809             :                 } else {
    7810           0 :                         rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count,
    7811             :                                                         bdev_nvme_readv_done, bio,
    7812             :                                                         bdev_nvme_queued_reset_sgl,
    7813             :                                                         bdev_nvme_queued_next_sge,
    7814             :                                                         &bio->ext_opts);
    7815             :                 }
    7816           2 :         } else if (iovcnt == 1) {
    7817           2 :                 rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base,
    7818             :                                                    md, lba, lba_count, bdev_nvme_readv_done,
    7819             :                                                    bio, flags, 0, 0);
    7820             :         } else {
    7821           0 :                 rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count,
    7822             :                                                     bdev_nvme_readv_done, bio, flags,
    7823             :                                                     bdev_nvme_queued_reset_sgl,
    7824             :                                                     bdev_nvme_queued_next_sge, md, 0, 0);
    7825             :         }
    7826             : 
    7827           3 :         if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
    7828           0 :                 SPDK_ERRLOG("readv failed: rc = %d\n", rc);
    7829             :         }
    7830           3 :         return rc;
    7831             : }
    7832             : 
    7833             : static int
    7834          25 : bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7835             :                  void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
    7836             :                  struct spdk_memory_domain *domain, void *domain_ctx,
    7837             :                  struct spdk_accel_sequence *seq,
    7838             :                  union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13)
    7839             : {
    7840          25 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7841          25 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7842             :         int rc;
    7843             : 
    7844          25 :         SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    7845             :                       lba_count, lba);
    7846             : 
    7847          25 :         bio->iovs = iov;
    7848          25 :         bio->iovcnt = iovcnt;
    7849          25 :         bio->iovpos = 0;
    7850          25 :         bio->iov_offset = 0;
    7851             : 
    7852          25 :         if (domain != NULL || seq != NULL) {
    7853           0 :                 bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
    7854           0 :                 bio->ext_opts.memory_domain = domain;
    7855           0 :                 bio->ext_opts.memory_domain_ctx = domain_ctx;
    7856           0 :                 bio->ext_opts.io_flags = flags | SPDK_NVME_IO_FLAGS_DIRECTIVE(cdw12.write.dtype);
    7857           0 :                 bio->ext_opts.cdw13 = cdw13.raw;
    7858           0 :                 bio->ext_opts.metadata = md;
    7859           0 :                 bio->ext_opts.accel_sequence = seq;
    7860             : 
    7861           0 :                 if (iovcnt == 1) {
    7862           0 :                         rc = spdk_nvme_ns_cmd_write_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_writev_done,
    7863             :                                                         bio, &bio->ext_opts);
    7864             :                 } else {
    7865           0 :                         rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count,
    7866             :                                                          bdev_nvme_writev_done, bio,
    7867             :                                                          bdev_nvme_queued_reset_sgl,
    7868             :                                                          bdev_nvme_queued_next_sge,
    7869             :                                                          &bio->ext_opts);
    7870             :                 }
    7871          25 :         } else if (iovcnt == 1) {
    7872          25 :                 rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base,
    7873             :                                                     md, lba, lba_count, bdev_nvme_writev_done,
    7874             :                                                     bio, flags, 0, 0);
    7875             :         } else {
    7876           0 :                 rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
    7877             :                                                      bdev_nvme_writev_done, bio, flags,
    7878             :                                                      bdev_nvme_queued_reset_sgl,
    7879             :                                                      bdev_nvme_queued_next_sge, md, 0, 0);
    7880             :         }
    7881             : 
    7882          25 :         if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
    7883           0 :                 SPDK_ERRLOG("writev failed: rc = %d\n", rc);
    7884             :         }
    7885          25 :         return rc;
    7886             : }
    7887             : 
    7888             : static int
    7889           0 : bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7890             :                        void *md, uint64_t lba_count, uint64_t zslba,
    7891             :                        uint32_t flags)
    7892             : {
    7893           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7894           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7895             :         int rc;
    7896             : 
    7897           0 :         SPDK_DEBUGLOG(bdev_nvme, "zone append %" PRIu64 " blocks to zone start lba %#" PRIx64 "\n",
    7898             :                       lba_count, zslba);
    7899             : 
    7900           0 :         bio->iovs = iov;
    7901           0 :         bio->iovcnt = iovcnt;
    7902           0 :         bio->iovpos = 0;
    7903           0 :         bio->iov_offset = 0;
    7904             : 
    7905           0 :         if (iovcnt == 1) {
    7906           0 :                 rc = spdk_nvme_zns_zone_append_with_md(ns, qpair, iov[0].iov_base, md, zslba,
    7907             :                                                        lba_count,
    7908             :                                                        bdev_nvme_zone_appendv_done, bio,
    7909             :                                                        flags,
    7910             :                                                        0, 0);
    7911             :         } else {
    7912           0 :                 rc = spdk_nvme_zns_zone_appendv_with_md(ns, qpair, zslba, lba_count,
    7913             :                                                         bdev_nvme_zone_appendv_done, bio, flags,
    7914             :                                                         bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    7915             :                                                         md, 0, 0);
    7916             :         }
    7917             : 
    7918           0 :         if (rc != 0 && rc != -ENOMEM) {
    7919           0 :                 SPDK_ERRLOG("zone append failed: rc = %d\n", rc);
    7920             :         }
    7921           0 :         return rc;
    7922             : }
    7923             : 
    7924             : static int
    7925           1 : bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7926             :                    void *md, uint64_t lba_count, uint64_t lba,
    7927             :                    uint32_t flags)
    7928             : {
    7929             :         int rc;
    7930             : 
    7931           1 :         SPDK_DEBUGLOG(bdev_nvme, "compare %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    7932             :                       lba_count, lba);
    7933             : 
    7934           1 :         bio->iovs = iov;
    7935           1 :         bio->iovcnt = iovcnt;
    7936           1 :         bio->iovpos = 0;
    7937           1 :         bio->iov_offset = 0;
    7938             : 
    7939           1 :         rc = spdk_nvme_ns_cmd_comparev_with_md(bio->io_path->nvme_ns->ns,
    7940           1 :                                                bio->io_path->qpair->qpair,
    7941             :                                                lba, lba_count,
    7942             :                                                bdev_nvme_comparev_done, bio, flags,
    7943             :                                                bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    7944             :                                                md, 0, 0);
    7945             : 
    7946           1 :         if (rc != 0 && rc != -ENOMEM) {
    7947           0 :                 SPDK_ERRLOG("comparev failed: rc = %d\n", rc);
    7948             :         }
    7949           1 :         return rc;
    7950             : }
    7951             : 
    7952             : static int
    7953           2 : bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt,
    7954             :                               struct iovec *write_iov, int write_iovcnt,
    7955             :                               void *md, uint64_t lba_count, uint64_t lba, uint32_t flags)
    7956             : {
    7957           2 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7958           2 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7959           2 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7960             :         int rc;
    7961             : 
    7962           2 :         SPDK_DEBUGLOG(bdev_nvme, "compare and write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    7963             :                       lba_count, lba);
    7964             : 
    7965           2 :         bio->iovs = cmp_iov;
    7966           2 :         bio->iovcnt = cmp_iovcnt;
    7967           2 :         bio->iovpos = 0;
    7968           2 :         bio->iov_offset = 0;
    7969           2 :         bio->fused_iovs = write_iov;
    7970           2 :         bio->fused_iovcnt = write_iovcnt;
    7971           2 :         bio->fused_iovpos = 0;
    7972           2 :         bio->fused_iov_offset = 0;
    7973             : 
    7974           2 :         if (bdev_io->num_retries == 0) {
    7975           2 :                 bio->first_fused_submitted = false;
    7976           2 :                 bio->first_fused_completed = false;
    7977             :         }
    7978             : 
    7979           2 :         if (!bio->first_fused_submitted) {
    7980           2 :                 flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST;
    7981           2 :                 memset(&bio->cpl, 0, sizeof(bio->cpl));
    7982             : 
    7983           2 :                 rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count,
    7984             :                                                        bdev_nvme_comparev_and_writev_done, bio, flags,
    7985             :                                                        bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0);
    7986           2 :                 if (rc == 0) {
    7987           2 :                         bio->first_fused_submitted = true;
    7988           2 :                         flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST;
    7989             :                 } else {
    7990           0 :                         if (rc != -ENOMEM) {
    7991           0 :                                 SPDK_ERRLOG("compare failed: rc = %d\n", rc);
    7992             :                         }
    7993           0 :                         return rc;
    7994             :                 }
    7995             :         }
    7996             : 
    7997           2 :         flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND;
    7998             : 
    7999           2 :         rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
    8000             :                                              bdev_nvme_comparev_and_writev_done, bio, flags,
    8001             :                                              bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0);
    8002           2 :         if (rc != 0 && rc != -ENOMEM) {
    8003           0 :                 SPDK_ERRLOG("write failed: rc = %d\n", rc);
    8004           0 :                 rc = 0;
    8005             :         }
    8006             : 
    8007           2 :         return rc;
    8008             : }
    8009             : 
    8010             : static int
    8011           1 : bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
    8012             : {
    8013           1 :         struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
    8014             :         struct spdk_nvme_dsm_range *range;
    8015             :         uint64_t offset, remaining;
    8016             :         uint64_t num_ranges_u64;
    8017             :         uint16_t num_ranges;
    8018             :         int rc;
    8019             : 
    8020           1 :         num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
    8021             :                          SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8022           1 :         if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
    8023           0 :                 SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
    8024           0 :                 return -EINVAL;
    8025             :         }
    8026           1 :         num_ranges = (uint16_t)num_ranges_u64;
    8027             : 
    8028           1 :         offset = offset_blocks;
    8029           1 :         remaining = num_blocks;
    8030           1 :         range = &dsm_ranges[0];
    8031             : 
    8032             :         /* Fill max-size ranges until the remaining blocks fit into one range */
    8033           1 :         while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
    8034           0 :                 range->attributes.raw = 0;
    8035           0 :                 range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8036           0 :                 range->starting_lba = offset;
    8037             : 
    8038           0 :                 offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8039           0 :                 remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8040           0 :                 range++;
    8041             :         }
    8042             : 
    8043             :         /* Final range describes the remaining blocks */
    8044           1 :         range->attributes.raw = 0;
    8045           1 :         range->length = remaining;
    8046           1 :         range->starting_lba = offset;
    8047             : 
    8048           1 :         rc = spdk_nvme_ns_cmd_dataset_management(bio->io_path->nvme_ns->ns,
    8049           1 :                         bio->io_path->qpair->qpair,
    8050             :                         SPDK_NVME_DSM_ATTR_DEALLOCATE,
    8051             :                         dsm_ranges, num_ranges,
    8052             :                         bdev_nvme_queued_done, bio);
    8053             : 
    8054           1 :         return rc;
    8055             : }
    8056             : 
    8057             : static int
    8058           0 : bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
    8059             : {
    8060           0 :         if (num_blocks > UINT16_MAX + 1) {
    8061           0 :                 SPDK_ERRLOG("NVMe write zeroes is limited to 16-bit block count\n");
    8062           0 :                 return -EINVAL;
    8063             :         }
    8064             : 
    8065           0 :         return spdk_nvme_ns_cmd_write_zeroes(bio->io_path->nvme_ns->ns,
    8066           0 :                                              bio->io_path->qpair->qpair,
    8067             :                                              offset_blocks, num_blocks,
    8068             :                                              bdev_nvme_queued_done, bio,
    8069             :                                              0);
    8070             : }
    8071             : 
    8072             : static int
    8073           0 : bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones,
    8074             :                         struct spdk_bdev_zone_info *info)
    8075             : {
    8076           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8077           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8078           0 :         uint32_t zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8079           0 :         uint64_t zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    8080           0 :         uint64_t total_zones = spdk_nvme_zns_ns_get_num_zones(ns);
    8081             : 
    8082           0 :         if (zone_id % zone_size != 0) {
    8083           0 :                 return -EINVAL;
    8084             :         }
    8085             : 
    8086           0 :         if (num_zones > total_zones || !num_zones) {
    8087           0 :                 return -EINVAL;
    8088             :         }
    8089             : 
    8090           0 :         assert(!bio->zone_report_buf);
    8091           0 :         bio->zone_report_buf = calloc(1, zone_report_bufsize);
    8092           0 :         if (!bio->zone_report_buf) {
    8093           0 :                 return -ENOMEM;
    8094             :         }
    8095             : 
    8096           0 :         bio->handled_zones = 0;
    8097             : 
    8098           0 :         return spdk_nvme_zns_report_zones(ns, qpair, bio->zone_report_buf, zone_report_bufsize,
    8099             :                                           zone_id, SPDK_NVME_ZRA_LIST_ALL, true,
    8100             :                                           bdev_nvme_get_zone_info_done, bio);
    8101             : }
    8102             : 
    8103             : static int
    8104           0 : bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
    8105             :                           enum spdk_bdev_zone_action action)
    8106             : {
    8107           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8108           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8109             : 
    8110           0 :         switch (action) {
    8111           0 :         case SPDK_BDEV_ZONE_CLOSE:
    8112           0 :                 return spdk_nvme_zns_close_zone(ns, qpair, zone_id, false,
    8113             :                                                 bdev_nvme_zone_management_done, bio);
    8114           0 :         case SPDK_BDEV_ZONE_FINISH:
    8115           0 :                 return spdk_nvme_zns_finish_zone(ns, qpair, zone_id, false,
    8116             :                                                  bdev_nvme_zone_management_done, bio);
    8117           0 :         case SPDK_BDEV_ZONE_OPEN:
    8118           0 :                 return spdk_nvme_zns_open_zone(ns, qpair, zone_id, false,
    8119             :                                                bdev_nvme_zone_management_done, bio);
    8120           0 :         case SPDK_BDEV_ZONE_RESET:
    8121           0 :                 return spdk_nvme_zns_reset_zone(ns, qpair, zone_id, false,
    8122             :                                                 bdev_nvme_zone_management_done, bio);
    8123           0 :         case SPDK_BDEV_ZONE_OFFLINE:
    8124           0 :                 return spdk_nvme_zns_offline_zone(ns, qpair, zone_id, false,
    8125             :                                                   bdev_nvme_zone_management_done, bio);
    8126           0 :         default:
    8127           0 :                 return -EINVAL;
    8128             :         }
    8129             : }
    8130             : 
    8131             : static void
    8132           5 : bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
    8133             :                          struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
    8134             : {
    8135             :         struct nvme_io_path *io_path;
    8136             :         struct nvme_ctrlr *nvme_ctrlr;
    8137             :         uint32_t max_xfer_size;
    8138           5 :         int rc = -ENXIO;
    8139             : 
    8140             :         /* Choose the first ctrlr which is not failed. */
    8141           8 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    8142           7 :                 nvme_ctrlr = io_path->qpair->ctrlr;
    8143             : 
    8144             :                 /* We should skip any unavailable nvme_ctrlr rather than checking
    8145             :                  * if the return value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
    8146             :                  */
    8147           7 :                 if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
    8148           3 :                         continue;
    8149             :                 }
    8150             : 
    8151           4 :                 max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ctrlr->ctrlr);
    8152             : 
    8153           4 :                 if (nbytes > max_xfer_size) {
    8154           0 :                         SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8155           0 :                         rc = -EINVAL;
    8156           0 :                         goto err;
    8157             :                 }
    8158             : 
    8159           4 :                 rc = spdk_nvme_ctrlr_cmd_admin_raw(nvme_ctrlr->ctrlr, cmd, buf, (uint32_t)nbytes,
    8160             :                                                    bdev_nvme_admin_passthru_done, bio);
    8161           4 :                 if (rc == 0) {
    8162           4 :                         return;
    8163             :                 }
    8164             :         }
    8165             : 
    8166           1 : err:
    8167           1 :         bdev_nvme_admin_complete(bio, rc);
    8168             : }
    8169             : 
    8170             : static int
    8171           0 : bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
    8172             :                       void *buf, size_t nbytes)
    8173             : {
    8174           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8175           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8176           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8177           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    8178             : 
    8179           0 :         if (nbytes > max_xfer_size) {
    8180           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8181           0 :                 return -EINVAL;
    8182             :         }
    8183             : 
    8184             :         /*
    8185             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
    8186             :          * so fill it out automatically.
    8187             :          */
    8188           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8189             : 
    8190           0 :         return spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, cmd, buf,
    8191             :                                           (uint32_t)nbytes, bdev_nvme_queued_done, bio);
    8192             : }
    8193             : 
    8194             : static int
    8195           0 : bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
    8196             :                          void *buf, size_t nbytes, void *md_buf, size_t md_len)
    8197             : {
    8198           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8199           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8200           0 :         size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
    8201           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8202           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    8203             : 
    8204           0 :         if (nbytes > max_xfer_size) {
    8205           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8206           0 :                 return -EINVAL;
    8207             :         }
    8208             : 
    8209           0 :         if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
    8210           0 :                 SPDK_ERRLOG("invalid meta data buffer size\n");
    8211           0 :                 return -EINVAL;
    8212             :         }
    8213             : 
    8214             :         /*
    8215             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
    8216             :          * so fill it out automatically.
    8217             :          */
    8218           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8219             : 
    8220           0 :         return spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, qpair, cmd, buf,
    8221             :                         (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
    8222             : }
    8223             : 
    8224             : static int
    8225           0 : bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio,
    8226             :                           struct spdk_nvme_cmd *cmd, struct iovec *iov, int iovcnt,
    8227             :                           size_t nbytes, void *md_buf, size_t md_len)
    8228             : {
    8229           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8230           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8231           0 :         size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
    8232           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8233           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    8234             : 
    8235           0 :         bio->iovs = iov;
    8236           0 :         bio->iovcnt = iovcnt;
    8237           0 :         bio->iovpos = 0;
    8238           0 :         bio->iov_offset = 0;
    8239             : 
    8240           0 :         if (nbytes > max_xfer_size) {
    8241           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8242           0 :                 return -EINVAL;
    8243             :         }
    8244             : 
    8245           0 :         if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
    8246           0 :                 SPDK_ERRLOG("invalid meta data buffer size\n");
    8247           0 :                 return -EINVAL;
    8248             :         }
    8249             : 
    8250             :         /*
    8251             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands
    8252             :          * require a nsid, so fill it out automatically.
    8253             :          */
    8254           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8255             : 
    8256           0 :         return spdk_nvme_ctrlr_cmd_iov_raw_with_md(
    8257             :                        ctrlr, qpair, cmd, (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio,
    8258             :                        bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
    8259             : }
    8260             : 
    8261             : static void
    8262           6 : bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
    8263             :                 struct nvme_bdev_io *bio_to_abort)
    8264             : {
    8265             :         struct nvme_io_path *io_path;
    8266           6 :         int rc = 0;
    8267             : 
    8268           6 :         rc = bdev_nvme_abort_retry_io(nbdev_ch, bio_to_abort);
    8269           6 :         if (rc == 0) {
    8270           1 :                 bdev_nvme_admin_complete(bio, 0);
    8271           1 :                 return;
    8272             :         }
    8273             : 
    8274           5 :         io_path = bio_to_abort->io_path;
    8275           5 :         if (io_path != NULL) {
    8276           3 :                 rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
    8277           3 :                                                    io_path->qpair->qpair,
    8278             :                                                    bio_to_abort,
    8279             :                                                    bdev_nvme_abort_done, bio);
    8280             :         } else {
    8281           3 :                 STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    8282           2 :                         rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
    8283             :                                                            NULL,
    8284             :                                                            bio_to_abort,
    8285             :                                                            bdev_nvme_abort_done, bio);
    8286             : 
    8287           2 :                         if (rc != -ENOENT) {
    8288           1 :                                 break;
    8289             :                         }
    8290             :                 }
    8291             :         }
    8292             : 
    8293           5 :         if (rc != 0) {
    8294             :                 /* If no command was found or there was any error, complete the abort
    8295             :                  * request with failure.
    8296             :                  */
    8297           2 :                 bdev_nvme_admin_complete(bio, rc);
    8298             :         }
    8299             : }
    8300             : 
    8301             : static int
    8302           0 : bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
    8303             :                uint64_t num_blocks)
    8304             : {
    8305           0 :         struct spdk_nvme_scc_source_range range = {
    8306             :                 .slba = src_offset_blocks,
    8307           0 :                 .nlb = num_blocks - 1
    8308             :         };
    8309             : 
    8310           0 :         return spdk_nvme_ns_cmd_copy(bio->io_path->nvme_ns->ns,
    8311           0 :                                      bio->io_path->qpair->qpair,
    8312             :                                      &range, 1, dst_offset_blocks,
    8313             :                                      bdev_nvme_queued_done, bio);
    8314             : }
    8315             : 
    8316             : static void
    8317           0 : bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
    8318             : {
    8319             :         const char *action;
    8320             :         uint32_t i;
    8321             : 
    8322           0 :         if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
    8323           0 :                 action = "reset";
    8324           0 :         } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
    8325           0 :                 action = "abort";
    8326             :         } else {
    8327           0 :                 action = "none";
    8328             :         }
    8329             : 
    8330           0 :         spdk_json_write_object_begin(w);
    8331             : 
    8332           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_set_options");
    8333             : 
    8334           0 :         spdk_json_write_named_object_begin(w, "params");
    8335           0 :         spdk_json_write_named_string(w, "action_on_timeout", action);
    8336           0 :         spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
    8337           0 :         spdk_json_write_named_uint64(w, "timeout_admin_us", g_opts.timeout_admin_us);
    8338           0 :         spdk_json_write_named_uint32(w, "keep_alive_timeout_ms", g_opts.keep_alive_timeout_ms);
    8339           0 :         spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst);
    8340           0 :         spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight);
    8341           0 :         spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight);
    8342           0 :         spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight);
    8343           0 :         spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
    8344           0 :         spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
    8345           0 :         spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
    8346           0 :         spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
    8347           0 :         spdk_json_write_named_uint32(w, "transport_retry_count", g_opts.transport_retry_count);
    8348           0 :         spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
    8349           0 :         spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
    8350           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
    8351           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
    8352           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
    8353           0 :         spdk_json_write_named_bool(w, "disable_auto_failback", g_opts.disable_auto_failback);
    8354           0 :         spdk_json_write_named_bool(w, "generate_uuids", g_opts.generate_uuids);
    8355           0 :         spdk_json_write_named_uint8(w, "transport_tos", g_opts.transport_tos);
    8356           0 :         spdk_json_write_named_bool(w, "nvme_error_stat", g_opts.nvme_error_stat);
    8357           0 :         spdk_json_write_named_uint32(w, "rdma_srq_size", g_opts.rdma_srq_size);
    8358           0 :         spdk_json_write_named_bool(w, "io_path_stat", g_opts.io_path_stat);
    8359           0 :         spdk_json_write_named_bool(w, "allow_accel_sequence", g_opts.allow_accel_sequence);
    8360           0 :         spdk_json_write_named_uint32(w, "rdma_max_cq_size", g_opts.rdma_max_cq_size);
    8361           0 :         spdk_json_write_named_uint16(w, "rdma_cm_event_timeout_ms", g_opts.rdma_cm_event_timeout_ms);
    8362           0 :         spdk_json_write_named_array_begin(w, "dhchap_digests");
    8363           0 :         for (i = 0; i < 32; ++i) {
    8364           0 :                 if (g_opts.dhchap_digests & SPDK_BIT(i)) {
    8365           0 :                         spdk_json_write_string(w, spdk_nvme_dhchap_get_digest_name(i));
    8366             :                 }
    8367             :         }
    8368           0 :         spdk_json_write_array_end(w);
    8369           0 :         spdk_json_write_named_array_begin(w, "dhchap_dhgroups");
    8370           0 :         for (i = 0; i < 32; ++i) {
    8371           0 :                 if (g_opts.dhchap_dhgroups & SPDK_BIT(i)) {
    8372           0 :                         spdk_json_write_string(w, spdk_nvme_dhchap_get_dhgroup_name(i));
    8373             :                 }
    8374             :         }
    8375             : 
    8376           0 :         spdk_json_write_array_end(w);
    8377           0 :         spdk_json_write_object_end(w);
    8378             : 
    8379           0 :         spdk_json_write_object_end(w);
    8380           0 : }
    8381             : 
    8382             : static void
    8383           0 : bdev_nvme_discovery_config_json(struct spdk_json_write_ctx *w, struct discovery_ctx *ctx)
    8384             : {
    8385           0 :         struct spdk_nvme_transport_id trid;
    8386             : 
    8387           0 :         spdk_json_write_object_begin(w);
    8388             : 
    8389           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_start_discovery");
    8390             : 
    8391           0 :         spdk_json_write_named_object_begin(w, "params");
    8392           0 :         spdk_json_write_named_string(w, "name", ctx->name);
    8393           0 :         spdk_json_write_named_string(w, "hostnqn", ctx->hostnqn);
    8394             : 
    8395           0 :         trid = ctx->trid;
    8396           0 :         memset(trid.subnqn, 0, sizeof(trid.subnqn));
    8397           0 :         nvme_bdev_dump_trid_json(&trid, w);
    8398             : 
    8399           0 :         spdk_json_write_named_bool(w, "wait_for_attach", ctx->wait_for_attach);
    8400           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", ctx->bdev_opts.ctrlr_loss_timeout_sec);
    8401           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", ctx->bdev_opts.reconnect_delay_sec);
    8402           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
    8403             :                                      ctx->bdev_opts.fast_io_fail_timeout_sec);
    8404           0 :         spdk_json_write_object_end(w);
    8405             : 
    8406           0 :         spdk_json_write_object_end(w);
    8407           0 : }
    8408             : 
    8409             : #ifdef SPDK_CONFIG_NVME_CUSE
    8410             : static void
    8411           0 : nvme_ctrlr_cuse_config_json(struct spdk_json_write_ctx *w,
    8412             :                             struct nvme_ctrlr *nvme_ctrlr)
    8413           0 : {
    8414           0 :         size_t cuse_name_size = 128;
    8415           0 :         char cuse_name[cuse_name_size];
    8416             : 
    8417           0 :         if (spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr,
    8418             :                                           cuse_name, &cuse_name_size) != 0) {
    8419           0 :                 return;
    8420             :         }
    8421             : 
    8422           0 :         spdk_json_write_object_begin(w);
    8423             : 
    8424           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_cuse_register");
    8425             : 
    8426           0 :         spdk_json_write_named_object_begin(w, "params");
    8427           0 :         spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
    8428           0 :         spdk_json_write_object_end(w);
    8429             : 
    8430           0 :         spdk_json_write_object_end(w);
    8431             : }
    8432             : #endif
    8433             : 
    8434             : static void
    8435           0 : nvme_ctrlr_config_json(struct spdk_json_write_ctx *w,
    8436             :                        struct nvme_ctrlr *nvme_ctrlr)
    8437             : {
    8438             :         struct spdk_nvme_transport_id   *trid;
    8439             :         const struct spdk_nvme_ctrlr_opts *opts;
    8440             : 
    8441           0 :         if (nvme_ctrlr->opts.from_discovery_service) {
    8442             :                 /* Do not emit an RPC for this - it will be implicitly
    8443             :                  * covered by a separate bdev_nvme_start_discovery or
    8444             :                  * bdev_nvme_start_mdns_discovery RPC.
    8445             :                  */
    8446           0 :                 return;
    8447             :         }
    8448             : 
    8449           0 :         trid = &nvme_ctrlr->active_path_id->trid;
    8450             : 
    8451           0 :         spdk_json_write_object_begin(w);
    8452             : 
    8453           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller");
    8454             : 
    8455           0 :         spdk_json_write_named_object_begin(w, "params");
    8456           0 :         spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
    8457           0 :         nvme_bdev_dump_trid_json(trid, w);
    8458           0 :         spdk_json_write_named_bool(w, "prchk_reftag",
    8459           0 :                                    (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
    8460           0 :         spdk_json_write_named_bool(w, "prchk_guard",
    8461           0 :                                    (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
    8462           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->opts.ctrlr_loss_timeout_sec);
    8463           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->opts.reconnect_delay_sec);
    8464           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
    8465             :                                      nvme_ctrlr->opts.fast_io_fail_timeout_sec);
    8466           0 :         if (nvme_ctrlr->psk != NULL) {
    8467           0 :                 spdk_json_write_named_string(w, "psk", spdk_key_get_name(nvme_ctrlr->psk));
    8468           0 :         } else if (nvme_ctrlr->opts.psk[0] != '\0') {
    8469           0 :                 spdk_json_write_named_string(w, "psk", nvme_ctrlr->opts.psk);
    8470             :         }
    8471             : 
    8472           0 :         opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
    8473           0 :         spdk_json_write_named_string(w, "hostnqn", opts->hostnqn);
    8474           0 :         spdk_json_write_named_bool(w, "hdgst", opts->header_digest);
    8475           0 :         spdk_json_write_named_bool(w, "ddgst", opts->data_digest);
    8476           0 :         if (opts->src_addr[0] != '\0') {
    8477           0 :                 spdk_json_write_named_string(w, "hostaddr", opts->src_addr);
    8478             :         }
    8479           0 :         if (opts->src_svcid[0] != '\0') {
    8480           0 :                 spdk_json_write_named_string(w, "hostsvcid", opts->src_svcid);
    8481             :         }
    8482             : 
    8483           0 :         spdk_json_write_object_end(w);
    8484             : 
    8485           0 :         spdk_json_write_object_end(w);
    8486             : }
    8487             : 
    8488             : static void
    8489           0 : bdev_nvme_hotplug_config_json(struct spdk_json_write_ctx *w)
    8490             : {
    8491           0 :         spdk_json_write_object_begin(w);
    8492           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug");
    8493             : 
    8494           0 :         spdk_json_write_named_object_begin(w, "params");
    8495           0 :         spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
    8496           0 :         spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
    8497           0 :         spdk_json_write_object_end(w);
    8498             : 
    8499           0 :         spdk_json_write_object_end(w);
    8500           0 : }
    8501             : 
    8502             : static int
    8503           0 : bdev_nvme_config_json(struct spdk_json_write_ctx *w)
    8504             : {
    8505             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
    8506             :         struct nvme_ctrlr       *nvme_ctrlr;
    8507             :         struct discovery_ctx    *ctx;
    8508             : 
    8509           0 :         bdev_nvme_opts_config_json(w);
    8510             : 
    8511           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    8512             : 
    8513           0 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
    8514           0 :                 TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    8515           0 :                         nvme_ctrlr_config_json(w, nvme_ctrlr);
    8516             : 
    8517             : #ifdef SPDK_CONFIG_NVME_CUSE
    8518           0 :                         nvme_ctrlr_cuse_config_json(w, nvme_ctrlr);
    8519             : #endif
    8520             :                 }
    8521             :         }
    8522             : 
    8523           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    8524           0 :                 if (!ctx->from_mdns_discovery_service) {
    8525           0 :                         bdev_nvme_discovery_config_json(w, ctx);
    8526             :                 }
    8527             :         }
    8528             : 
    8529           0 :         bdev_nvme_mdns_discovery_config_json(w);
    8530             : 
    8531             :         /* Dump as last parameter to give all NVMe bdevs chance to be constructed
    8532             :          * before enabling hotplug poller.
    8533             :          */
    8534           0 :         bdev_nvme_hotplug_config_json(w);
    8535             : 
    8536           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    8537           0 :         return 0;
    8538             : }
    8539             : 
    8540             : struct spdk_nvme_ctrlr *
    8541           1 : bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
    8542             : {
    8543             :         struct nvme_bdev *nbdev;
    8544             :         struct nvme_ns *nvme_ns;
    8545             : 
    8546           1 :         if (!bdev || bdev->module != &nvme_if) {
    8547           0 :                 return NULL;
    8548             :         }
    8549             : 
    8550           1 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    8551           1 :         nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    8552           1 :         assert(nvme_ns != NULL);
    8553             : 
    8554           1 :         return nvme_ns->ctrlr->ctrlr;
    8555             : }
    8556             : 
    8557             : static bool
    8558          12 : nvme_io_path_is_current(struct nvme_io_path *io_path)
    8559             : {
    8560             :         const struct nvme_bdev_channel *nbdev_ch;
    8561             :         bool current;
    8562             : 
    8563          12 :         if (!nvme_io_path_is_available(io_path)) {
    8564           4 :                 return false;
    8565             :         }
    8566             : 
    8567           8 :         nbdev_ch = io_path->nbdev_ch;
    8568           8 :         if (nbdev_ch == NULL) {
    8569           1 :                 current = false;
    8570           7 :         } else if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
    8571           3 :                 struct nvme_io_path *optimized_io_path = NULL;
    8572             : 
    8573           6 :                 STAILQ_FOREACH(optimized_io_path, &nbdev_ch->io_path_list, stailq) {
    8574           5 :                         if (optimized_io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) {
    8575           2 :                                 break;
    8576             :                         }
    8577             :                 }
    8578             : 
    8579             :                 /* A non-optimized path is only current if there are no optimized paths. */
    8580           3 :                 current = (io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) ||
    8581             :                           (optimized_io_path == NULL);
    8582             :         } else {
    8583           4 :                 if (nbdev_ch->current_io_path) {
    8584           1 :                         current = (io_path == nbdev_ch->current_io_path);
    8585             :                 } else {
    8586             :                         struct nvme_io_path *first_path;
    8587             : 
    8588             :                         /* We arrived here as there are no optimized paths for active-passive
    8589             :                          * mode. Check if this io_path is the first one available on the list.
    8590             :                          */
    8591           3 :                         current = false;
    8592           3 :                         STAILQ_FOREACH(first_path, &nbdev_ch->io_path_list, stailq) {
    8593           3 :                                 if (nvme_io_path_is_available(first_path)) {
    8594           3 :                                         current = (io_path == first_path);
    8595           3 :                                         break;
    8596             :                                 }
    8597             :                         }
    8598             :                 }
    8599             :         }
    8600             : 
    8601           8 :         return current;
    8602             : }
    8603             : 
    8604             : void
    8605           0 : nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path)
    8606             : {
    8607           0 :         struct nvme_ns *nvme_ns = io_path->nvme_ns;
    8608           0 :         struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
    8609             :         const struct spdk_nvme_ctrlr_data *cdata;
    8610             :         const struct spdk_nvme_transport_id *trid;
    8611             :         const char *adrfam_str;
    8612             : 
    8613           0 :         spdk_json_write_object_begin(w);
    8614             : 
    8615           0 :         spdk_json_write_named_string(w, "bdev_name", nvme_ns->bdev->disk.name);
    8616             : 
    8617           0 :         cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    8618           0 :         trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);
    8619             : 
    8620           0 :         spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
    8621           0 :         spdk_json_write_named_bool(w, "current", nvme_io_path_is_current(io_path));
    8622           0 :         spdk_json_write_named_bool(w, "connected", nvme_qpair_is_connected(io_path->qpair));
    8623           0 :         spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));
    8624             : 
    8625           0 :         spdk_json_write_named_object_begin(w, "transport");
    8626           0 :         spdk_json_write_named_string(w, "trtype", trid->trstring);
    8627           0 :         spdk_json_write_named_string(w, "traddr", trid->traddr);
    8628           0 :         if (trid->trsvcid[0] != '\0') {
    8629           0 :                 spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
    8630             :         }
    8631           0 :         adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
    8632           0 :         if (adrfam_str) {
    8633           0 :                 spdk_json_write_named_string(w, "adrfam", adrfam_str);
    8634             :         }
    8635           0 :         spdk_json_write_object_end(w);
    8636             : 
    8637           0 :         spdk_json_write_object_end(w);
    8638           0 : }
    8639             : 
    8640             : void
    8641           0 : bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w)
    8642             : {
    8643             :         struct discovery_ctx *ctx;
    8644             :         struct discovery_entry_ctx *entry_ctx;
    8645             : 
    8646           0 :         spdk_json_write_array_begin(w);
    8647           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    8648           0 :                 spdk_json_write_object_begin(w);
    8649           0 :                 spdk_json_write_named_string(w, "name", ctx->name);
    8650             : 
    8651           0 :                 spdk_json_write_named_object_begin(w, "trid");
    8652           0 :                 nvme_bdev_dump_trid_json(&ctx->trid, w);
    8653           0 :                 spdk_json_write_object_end(w);
    8654             : 
    8655           0 :                 spdk_json_write_named_array_begin(w, "referrals");
    8656           0 :                 TAILQ_FOREACH(entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
    8657           0 :                         spdk_json_write_object_begin(w);
    8658           0 :                         spdk_json_write_named_object_begin(w, "trid");
    8659           0 :                         nvme_bdev_dump_trid_json(&entry_ctx->trid, w);
    8660           0 :                         spdk_json_write_object_end(w);
    8661           0 :                         spdk_json_write_object_end(w);
    8662             :                 }
    8663           0 :                 spdk_json_write_array_end(w);
    8664             : 
    8665           0 :                 spdk_json_write_object_end(w);
    8666             :         }
    8667           0 :         spdk_json_write_array_end(w);
    8668           0 : }
    8669             : 
    8670           1 : SPDK_LOG_REGISTER_COMPONENT(bdev_nvme)
    8671             : 
    8672           1 : SPDK_TRACE_REGISTER_FN(bdev_nvme_trace, "bdev_nvme", TRACE_GROUP_BDEV_NVME)
    8673             : {
    8674           0 :         struct spdk_trace_tpoint_opts opts[] = {
    8675             :                 {
    8676             :                         "BDEV_NVME_IO_START", TRACE_BDEV_NVME_IO_START,
    8677             :                         OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 1,
    8678             :                         {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
    8679             :                 },
    8680             :                 {
    8681             :                         "BDEV_NVME_IO_DONE", TRACE_BDEV_NVME_IO_DONE,
    8682             :                         OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 0,
    8683             :                         {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
    8684             :                 }
    8685             :         };
    8686             : 
    8687             : 
    8688           0 :         spdk_trace_register_object(OBJECT_BDEV_NVME_IO, 'N');
    8689           0 :         spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
    8690           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
    8691           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
    8692           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
    8693           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
    8694           0 : }

Generated by: LCOV version 1.15