LCOV - code coverage report
Current view: top level - module/bdev/nvme - bdev_nvme.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 2209 4042 54.7 %
Date: 2024-07-15 11:08:19 Functions: 212 303 70.0 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2016 Intel Corporation. All rights reserved.
       3             :  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
       4             :  *   Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  *   Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved.
       6             :  */
       7             : 
       8             : #include "spdk/stdinc.h"
       9             : 
      10             : #include "bdev_nvme.h"
      11             : 
      12             : #include "spdk/accel.h"
      13             : #include "spdk/config.h"
      14             : #include "spdk/endian.h"
      15             : #include "spdk/bdev.h"
      16             : #include "spdk/json.h"
      17             : #include "spdk/likely.h"
      18             : #include "spdk/nvme.h"
      19             : #include "spdk/nvme_ocssd.h"
      20             : #include "spdk/nvme_zns.h"
      21             : #include "spdk/opal.h"
      22             : #include "spdk/thread.h"
      23             : #include "spdk/trace.h"
      24             : #include "spdk/string.h"
      25             : #include "spdk/util.h"
      26             : #include "spdk/uuid.h"
      27             : 
      28             : #include "spdk/bdev_module.h"
      29             : #include "spdk/log.h"
      30             : 
      31             : #include "spdk_internal/usdt.h"
      32             : #include "spdk_internal/trace_defs.h"
      33             : 
      34             : #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true
      35             : #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
      36             : 
      37             : #define NSID_STR_LEN 10
      38             : 
      39             : static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
      40             : 
      41             : struct nvme_bdev_io {
      42             :         /** array of iovecs to transfer. */
      43             :         struct iovec *iovs;
      44             : 
      45             :         /** Number of iovecs in iovs array. */
      46             :         int iovcnt;
      47             : 
      48             :         /** Current iovec position. */
      49             :         int iovpos;
      50             : 
      51             :         /** Offset in current iovec. */
      52             :         uint32_t iov_offset;
      53             : 
      54             :         /** I/O path the current I/O or admin passthrough is submitted on, or the I/O path
      55             :          *  being reset in a reset I/O.
      56             :          */
      57             :         struct nvme_io_path *io_path;
      58             : 
      59             :         /** array of iovecs to transfer. */
      60             :         struct iovec *fused_iovs;
      61             : 
      62             :         /** Number of iovecs in iovs array. */
      63             :         int fused_iovcnt;
      64             : 
      65             :         /** Current iovec position. */
      66             :         int fused_iovpos;
      67             : 
      68             :         /** Offset in current iovec. */
      69             :         uint32_t fused_iov_offset;
      70             : 
      71             :         /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */
      72             :         struct spdk_nvme_cpl cpl;
      73             : 
      74             :         /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */
      75             :         struct spdk_nvme_ns_cmd_ext_io_opts ext_opts;
      76             : 
      77             :         /** Keeps track if first of fused commands was submitted */
      78             :         bool first_fused_submitted;
      79             : 
      80             :         /** Keeps track if first of fused commands was completed */
      81             :         bool first_fused_completed;
      82             : 
      83             :         /** Temporary pointer to zone report buffer */
      84             :         struct spdk_nvme_zns_zone_report *zone_report_buf;
      85             : 
      86             :         /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */
      87             :         uint64_t handled_zones;
      88             : 
      89             :         /** Expiration value in ticks to retry the current I/O. */
      90             :         uint64_t retry_ticks;
      91             : 
      92             :         /* How many times the current I/O was retried. */
      93             :         int32_t retry_count;
      94             : 
      95             :         /* Current tsc at submit time. */
      96             :         uint64_t submit_tsc;
      97             : };
      98             : 
      99             : struct nvme_probe_skip_entry {
     100             :         struct spdk_nvme_transport_id           trid;
     101             :         TAILQ_ENTRY(nvme_probe_skip_entry)      tailq;
     102             : };
     103             : /* All the controllers deleted by users via RPC are skipped by hotplug monitor */
     104             : static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
     105             :                         g_skipped_nvme_ctrlrs);
     106             : 
     107             : static struct spdk_bdev_nvme_opts g_opts = {
     108             :         .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
     109             :         .timeout_us = 0,
     110             :         .timeout_admin_us = 0,
     111             :         .keep_alive_timeout_ms = SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS,
     112             :         .transport_retry_count = 4,
     113             :         .arbitration_burst = 0,
     114             :         .low_priority_weight = 0,
     115             :         .medium_priority_weight = 0,
     116             :         .high_priority_weight = 0,
     117             :         .nvme_adminq_poll_period_us = 10000ULL,
     118             :         .nvme_ioq_poll_period_us = 0,
     119             :         .io_queue_requests = 0,
     120             :         .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
     121             :         .bdev_retry_count = 3,
     122             :         .transport_ack_timeout = 0,
     123             :         .ctrlr_loss_timeout_sec = 0,
     124             :         .reconnect_delay_sec = 0,
     125             :         .fast_io_fail_timeout_sec = 0,
     126             :         .disable_auto_failback = false,
     127             :         .generate_uuids = false,
     128             :         .transport_tos = 0,
     129             :         .nvme_error_stat = false,
     130             :         .io_path_stat = false,
     131             :         .allow_accel_sequence = false,
     132             : };
     133             : 
     134             : #define NVME_HOTPLUG_POLL_PERIOD_MAX                    10000000ULL
     135             : #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT                100000ULL
     136             : 
     137             : static int g_hot_insert_nvme_controller_index = 0;
     138             : static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
     139             : static bool g_nvme_hotplug_enabled = false;
     140             : struct spdk_thread *g_bdev_nvme_init_thread;
     141             : static struct spdk_poller *g_hotplug_poller;
     142             : static struct spdk_poller *g_hotplug_probe_poller;
     143             : static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
     144             : 
     145             : static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
     146             :                 struct nvme_async_probe_ctx *ctx);
     147             : static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
     148             :                 struct nvme_async_probe_ctx *ctx);
     149             : static int bdev_nvme_library_init(void);
     150             : static void bdev_nvme_library_fini(void);
     151             : static void _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch,
     152             :                                       struct spdk_bdev_io *bdev_io);
     153             : static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
     154             :                                      struct spdk_bdev_io *bdev_io);
     155             : static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     156             :                            void *md, uint64_t lba_count, uint64_t lba,
     157             :                            uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
     158             :                            struct spdk_accel_sequence *seq);
     159             : static int bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     160             :                                  void *md, uint64_t lba_count, uint64_t lba);
     161             : static int bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     162             :                             void *md, uint64_t lba_count, uint64_t lba,
     163             :                             uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
     164             :                             struct spdk_accel_sequence *seq);
     165             : static int bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     166             :                                   void *md, uint64_t lba_count,
     167             :                                   uint64_t zslba, uint32_t flags);
     168             : static int bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     169             :                               void *md, uint64_t lba_count, uint64_t lba,
     170             :                               uint32_t flags);
     171             : static int bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio,
     172             :                 struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
     173             :                 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba,
     174             :                 uint32_t flags);
     175             : static int bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id,
     176             :                                    uint32_t num_zones, struct spdk_bdev_zone_info *info);
     177             : static int bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
     178             :                                      enum spdk_bdev_zone_action action);
     179             : static void bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch,
     180             :                                      struct nvme_bdev_io *bio,
     181             :                                      struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
     182             : static int bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     183             :                                  void *buf, size_t nbytes);
     184             : static int bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     185             :                                     void *buf, size_t nbytes, void *md_buf, size_t md_len);
     186             : static int bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     187             :                                      struct iovec *iov, int iovcnt, size_t nbytes,
     188             :                                      void *md_buf, size_t md_len);
     189             : static void bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch,
     190             :                             struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
     191             : static void bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio);
     192             : static int bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
     193             : static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
     194             : static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
     195             : static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr);
     196             : 
     197             : static struct nvme_ns *nvme_ns_alloc(void);
     198             : static void nvme_ns_free(struct nvme_ns *ns);
     199             : 
     200             : static int
     201         173 : nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2)
     202             : {
     203         173 :         return ns1->id < ns2->id ? -1 : ns1->id > ns2->id;
     204             : }
     205             : 
     206         902 : RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp);
     207             : 
     208             : struct spdk_nvme_qpair *
     209           1 : bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
     210             : {
     211             :         struct nvme_ctrlr_channel *ctrlr_ch;
     212             : 
     213           1 :         assert(ctrlr_io_ch != NULL);
     214             : 
     215           1 :         ctrlr_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
     216             : 
     217           1 :         return ctrlr_ch->qpair->qpair;
     218             : }
     219             : 
     220             : static int
     221           0 : bdev_nvme_get_ctx_size(void)
     222             : {
     223           0 :         return sizeof(struct nvme_bdev_io);
     224             : }
     225             : 
     226             : static struct spdk_bdev_module nvme_if = {
     227             :         .name = "nvme",
     228             :         .async_fini = true,
     229             :         .module_init = bdev_nvme_library_init,
     230             :         .module_fini = bdev_nvme_library_fini,
     231             :         .config_json = bdev_nvme_config_json,
     232             :         .get_ctx_size = bdev_nvme_get_ctx_size,
     233             : 
     234             : };
     235           1 : SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
     236             : 
     237             : struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_bdev_ctrlrs);
     238             : pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
     239             : bool g_bdev_nvme_module_finish;
     240             : 
     241             : struct nvme_bdev_ctrlr *
     242         270 : nvme_bdev_ctrlr_get_by_name(const char *name)
     243             : {
     244             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     245             : 
     246         270 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     247         148 :                 if (strcmp(name, nbdev_ctrlr->name) == 0) {
     248         148 :                         break;
     249             :                 }
     250             :         }
     251             : 
     252         270 :         return nbdev_ctrlr;
     253             : }
     254             : 
     255             : static struct nvme_ctrlr *
     256          58 : nvme_bdev_ctrlr_get_ctrlr(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     257             :                           const struct spdk_nvme_transport_id *trid)
     258             : {
     259             :         struct nvme_ctrlr *nvme_ctrlr;
     260             : 
     261          99 :         TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
     262          74 :                 if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->active_path_id->trid) == 0) {
     263          33 :                         break;
     264             :                 }
     265             :         }
     266             : 
     267          58 :         return nvme_ctrlr;
     268             : }
     269             : 
     270             : struct nvme_ctrlr *
     271           0 : nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     272             :                                 uint16_t cntlid)
     273             : {
     274             :         struct nvme_ctrlr *nvme_ctrlr;
     275             :         const struct spdk_nvme_ctrlr_data *cdata;
     276             : 
     277           0 :         TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
     278           0 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
     279           0 :                 if (cdata->cntlid == cntlid) {
     280           0 :                         break;
     281             :                 }
     282             :         }
     283             : 
     284           0 :         return nvme_ctrlr;
     285             : }
     286             : 
     287             : static struct nvme_bdev *
     288          72 : nvme_bdev_ctrlr_get_bdev(struct nvme_bdev_ctrlr *nbdev_ctrlr, uint32_t nsid)
     289             : {
     290             :         struct nvme_bdev *bdev;
     291             : 
     292          72 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     293         106 :         TAILQ_FOREACH(bdev, &nbdev_ctrlr->bdevs, tailq) {
     294          68 :                 if (bdev->nsid == nsid) {
     295          34 :                         break;
     296             :                 }
     297             :         }
     298          72 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     299             : 
     300          72 :         return bdev;
     301             : }
     302             : 
     303             : struct nvme_ns *
     304         139 : nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
     305             : {
     306         139 :         struct nvme_ns ns;
     307             : 
     308         139 :         assert(nsid > 0);
     309             : 
     310         139 :         ns.id = nsid;
     311         139 :         return RB_FIND(nvme_ns_tree, &nvme_ctrlr->namespaces, &ns);
     312             : }
     313             : 
     314             : struct nvme_ns *
     315         152 : nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr)
     316             : {
     317         152 :         return RB_MIN(nvme_ns_tree, &nvme_ctrlr->namespaces);
     318             : }
     319             : 
     320             : struct nvme_ns *
     321          63 : nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns)
     322             : {
     323          63 :         if (ns == NULL) {
     324           0 :                 return NULL;
     325             :         }
     326             : 
     327          63 :         return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
     328             : }
     329             : 
     330             : static struct nvme_ctrlr *
     331          51 : nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid)
     332             : {
     333             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
     334          51 :         struct nvme_ctrlr       *nvme_ctrlr = NULL;
     335             : 
     336          51 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     337          70 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     338          19 :                 nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, trid);
     339          19 :                 if (nvme_ctrlr != NULL) {
     340           0 :                         break;
     341             :                 }
     342             :         }
     343          51 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     344             : 
     345          51 :         return nvme_ctrlr;
     346             : }
     347             : 
     348             : struct nvme_ctrlr *
     349          71 : nvme_ctrlr_get_by_name(const char *name)
     350             : {
     351             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     352          71 :         struct nvme_ctrlr *nvme_ctrlr = NULL;
     353             : 
     354          71 :         if (name == NULL) {
     355           0 :                 return NULL;
     356             :         }
     357             : 
     358          71 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     359          71 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
     360          71 :         if (nbdev_ctrlr != NULL) {
     361          40 :                 nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
     362             :         }
     363          71 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     364             : 
     365          71 :         return nvme_ctrlr;
     366             : }
     367             : 
     368             : void
     369           0 : nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx)
     370             : {
     371             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     372             : 
     373           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     374           0 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     375           0 :                 fn(nbdev_ctrlr, ctx);
     376             :         }
     377           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     378           0 : }
     379             : 
     380             : void
     381           0 : nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
     382             : {
     383             :         const char *trtype_str;
     384             :         const char *adrfam_str;
     385             : 
     386           0 :         trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
     387           0 :         if (trtype_str) {
     388           0 :                 spdk_json_write_named_string(w, "trtype", trtype_str);
     389             :         }
     390             : 
     391           0 :         adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
     392           0 :         if (adrfam_str) {
     393           0 :                 spdk_json_write_named_string(w, "adrfam", adrfam_str);
     394             :         }
     395             : 
     396           0 :         if (trid->traddr[0] != '\0') {
     397           0 :                 spdk_json_write_named_string(w, "traddr", trid->traddr);
     398             :         }
     399             : 
     400           0 :         if (trid->trsvcid[0] != '\0') {
     401           0 :                 spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
     402             :         }
     403             : 
     404           0 :         if (trid->subnqn[0] != '\0') {
     405           0 :                 spdk_json_write_named_string(w, "subnqn", trid->subnqn);
     406             :         }
     407           0 : }
     408             : 
     409             : static void
     410          59 : nvme_bdev_ctrlr_delete(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     411             :                        struct nvme_ctrlr *nvme_ctrlr)
     412             : {
     413             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_delete, nvme_ctrlr->nbdev_ctrlr->name);
     414          59 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     415             : 
     416          59 :         TAILQ_REMOVE(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
     417          59 :         if (!TAILQ_EMPTY(&nbdev_ctrlr->ctrlrs)) {
     418          15 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
     419             : 
     420          15 :                 return;
     421             :         }
     422          44 :         TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
     423             : 
     424          44 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     425             : 
     426          44 :         assert(TAILQ_EMPTY(&nbdev_ctrlr->bdevs));
     427             : 
     428          44 :         free(nbdev_ctrlr->name);
     429          44 :         free(nbdev_ctrlr);
     430             : }
     431             : 
     432             : static void
     433          60 : _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
     434             : {
     435             :         struct nvme_path_id *path_id, *tmp_path;
     436             :         struct nvme_ns *ns, *tmp_ns;
     437             : 
     438          60 :         free(nvme_ctrlr->copied_ana_desc);
     439          60 :         spdk_free(nvme_ctrlr->ana_log_page);
     440             : 
     441          60 :         if (nvme_ctrlr->opal_dev) {
     442           0 :                 spdk_opal_dev_destruct(nvme_ctrlr->opal_dev);
     443           0 :                 nvme_ctrlr->opal_dev = NULL;
     444             :         }
     445             : 
     446          60 :         if (nvme_ctrlr->nbdev_ctrlr) {
     447          59 :                 nvme_bdev_ctrlr_delete(nvme_ctrlr->nbdev_ctrlr, nvme_ctrlr);
     448             :         }
     449             : 
     450          60 :         RB_FOREACH_SAFE(ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp_ns) {
     451           0 :                 RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
     452           0 :                 nvme_ns_free(ns);
     453             :         }
     454             : 
     455         120 :         TAILQ_FOREACH_SAFE(path_id, &nvme_ctrlr->trids, link, tmp_path) {
     456          60 :                 TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
     457          60 :                 free(path_id);
     458             :         }
     459             : 
     460          60 :         pthread_mutex_destroy(&nvme_ctrlr->mutex);
     461             : 
     462          60 :         free(nvme_ctrlr);
     463             : 
     464          60 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     465          60 :         if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
     466           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
     467           0 :                 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
     468           0 :                 spdk_bdev_module_fini_done();
     469           0 :                 return;
     470             :         }
     471          60 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     472             : }
     473             : 
     474             : static int
     475          60 : nvme_detach_poller(void *arg)
     476             : {
     477          60 :         struct nvme_ctrlr *nvme_ctrlr = arg;
     478             :         int rc;
     479             : 
     480          60 :         rc = spdk_nvme_detach_poll_async(nvme_ctrlr->detach_ctx);
     481          60 :         if (rc != -EAGAIN) {
     482          60 :                 spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
     483          60 :                 _nvme_ctrlr_delete(nvme_ctrlr);
     484             :         }
     485             : 
     486          60 :         return SPDK_POLLER_BUSY;
     487             : }
     488             : 
     489             : static void
     490          60 : nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
     491             : {
     492             :         int rc;
     493             : 
     494          60 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
     495             : 
     496             :         /* First, unregister the adminq poller, as the driver will poll adminq if necessary */
     497          60 :         spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
     498             : 
     499             :         /* If we got here, the reset/detach poller cannot be active */
     500          60 :         assert(nvme_ctrlr->reset_detach_poller == NULL);
     501          60 :         nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(nvme_detach_poller,
     502             :                                           nvme_ctrlr, 1000);
     503          60 :         if (nvme_ctrlr->reset_detach_poller == NULL) {
     504           0 :                 SPDK_ERRLOG("Failed to register detach poller\n");
     505           0 :                 goto error;
     506             :         }
     507             : 
     508          60 :         rc = spdk_nvme_detach_async(nvme_ctrlr->ctrlr, &nvme_ctrlr->detach_ctx);
     509          60 :         if (rc != 0) {
     510           0 :                 SPDK_ERRLOG("Failed to detach the NVMe controller\n");
     511           0 :                 goto error;
     512             :         }
     513             : 
     514          60 :         return;
     515           0 : error:
     516             :         /* We don't have a good way to handle errors here, so just do what we can and delete the
     517             :          * controller without detaching the underlying NVMe device.
     518             :          */
     519           0 :         spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
     520           0 :         _nvme_ctrlr_delete(nvme_ctrlr);
     521             : }
     522             : 
     523             : static void
     524          59 : nvme_ctrlr_unregister_cb(void *io_device)
     525             : {
     526          59 :         struct nvme_ctrlr *nvme_ctrlr = io_device;
     527             : 
     528          59 :         nvme_ctrlr_delete(nvme_ctrlr);
     529          59 : }
     530             : 
     531             : static void
     532          59 : nvme_ctrlr_unregister(void *ctx)
     533             : {
     534          59 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
     535             : 
     536          59 :         spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb);
     537          59 : }
     538             : 
     539             : static bool
     540         220 : nvme_ctrlr_can_be_unregistered(struct nvme_ctrlr *nvme_ctrlr)
     541             : {
     542         220 :         if (!nvme_ctrlr->destruct) {
     543         105 :                 return false;
     544             :         }
     545             : 
     546         115 :         if (nvme_ctrlr->ref > 0) {
     547          56 :                 return false;
     548             :         }
     549             : 
     550          59 :         if (nvme_ctrlr->resetting) {
     551           0 :                 return false;
     552             :         }
     553             : 
     554          59 :         if (nvme_ctrlr->ana_log_page_updating) {
     555           0 :                 return false;
     556             :         }
     557             : 
     558          59 :         if (nvme_ctrlr->io_path_cache_clearing) {
     559           0 :                 return false;
     560             :         }
     561             : 
     562          59 :         return true;
     563             : }
     564             : 
     565             : static void
     566         164 : nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr)
     567             : {
     568         164 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
     569             :         SPDK_DTRACE_PROBE2(bdev_nvme_ctrlr_release, nvme_ctrlr->nbdev_ctrlr->name, nvme_ctrlr->ref);
     570             : 
     571         164 :         assert(nvme_ctrlr->ref > 0);
     572         164 :         nvme_ctrlr->ref--;
     573             : 
     574         164 :         if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
     575         105 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
     576         105 :                 return;
     577             :         }
     578             : 
     579          59 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
     580             : 
     581          59 :         spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr);
     582             : }
     583             : 
     584             : static void
     585         161 : bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch)
     586             : {
     587         161 :         nbdev_ch->current_io_path = NULL;
     588         161 :         nbdev_ch->rr_counter = 0;
     589         161 : }
     590             : 
     591             : static struct nvme_io_path *
     592           8 : _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
     593             : {
     594             :         struct nvme_io_path *io_path;
     595             : 
     596          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
     597          15 :                 if (io_path->nvme_ns == nvme_ns) {
     598           7 :                         break;
     599             :                 }
     600             :         }
     601             : 
     602           8 :         return io_path;
     603             : }
     604             : 
     605             : static struct nvme_io_path *
     606          35 : nvme_io_path_alloc(void)
     607             : {
     608             :         struct nvme_io_path *io_path;
     609             : 
     610          35 :         io_path = calloc(1, sizeof(*io_path));
     611          35 :         if (io_path == NULL) {
     612           0 :                 SPDK_ERRLOG("Failed to alloc io_path.\n");
     613           0 :                 return NULL;
     614             :         }
     615             : 
     616          35 :         if (g_opts.io_path_stat) {
     617           0 :                 io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
     618           0 :                 if (io_path->stat == NULL) {
     619           0 :                         free(io_path);
     620           0 :                         SPDK_ERRLOG("Failed to alloc io_path stat.\n");
     621           0 :                         return NULL;
     622             :                 }
     623           0 :                 spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
     624             :         }
     625             : 
     626          35 :         return io_path;
     627             : }
     628             : 
     629             : static void
     630          35 : nvme_io_path_free(struct nvme_io_path *io_path)
     631             : {
     632          35 :         free(io_path->stat);
     633          35 :         free(io_path);
     634          35 : }
     635             : 
     636             : static int
     637          35 : _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
     638             : {
     639             :         struct nvme_io_path *io_path;
     640             :         struct spdk_io_channel *ch;
     641             :         struct nvme_ctrlr_channel *ctrlr_ch;
     642             :         struct nvme_qpair *nvme_qpair;
     643             : 
     644          35 :         io_path = nvme_io_path_alloc();
     645          35 :         if (io_path == NULL) {
     646           0 :                 return -ENOMEM;
     647             :         }
     648             : 
     649          35 :         io_path->nvme_ns = nvme_ns;
     650             : 
     651          35 :         ch = spdk_get_io_channel(nvme_ns->ctrlr);
     652          35 :         if (ch == NULL) {
     653           0 :                 nvme_io_path_free(io_path);
     654           0 :                 SPDK_ERRLOG("Failed to alloc io_channel.\n");
     655           0 :                 return -ENOMEM;
     656             :         }
     657             : 
     658          35 :         ctrlr_ch = spdk_io_channel_get_ctx(ch);
     659             : 
     660          35 :         nvme_qpair = ctrlr_ch->qpair;
     661          35 :         assert(nvme_qpair != NULL);
     662             : 
     663          35 :         io_path->qpair = nvme_qpair;
     664          35 :         TAILQ_INSERT_TAIL(&nvme_qpair->io_path_list, io_path, tailq);
     665             : 
     666          35 :         io_path->nbdev_ch = nbdev_ch;
     667          35 :         STAILQ_INSERT_TAIL(&nbdev_ch->io_path_list, io_path, stailq);
     668             : 
     669          35 :         bdev_nvme_clear_current_io_path(nbdev_ch);
     670             : 
     671          35 :         return 0;
     672             : }
     673             : 
     674             : static void
     675          35 : bdev_nvme_clear_retry_io_path(struct nvme_bdev_channel *nbdev_ch,
     676             :                               struct nvme_io_path *io_path)
     677             : {
     678             :         struct spdk_bdev_io *bdev_io;
     679             :         struct nvme_bdev_io *bio;
     680             : 
     681          36 :         TAILQ_FOREACH(bdev_io, &nbdev_ch->retry_io_list, module_link) {
     682           1 :                 bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
     683           1 :                 if (bio->io_path == io_path) {
     684           1 :                         bio->io_path = NULL;
     685             :                 }
     686             :         }
     687          35 : }
     688             : 
     689             : static void
     690          35 : _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *io_path)
     691             : {
     692             :         struct spdk_io_channel *ch;
     693             :         struct nvme_qpair *nvme_qpair;
     694             :         struct nvme_ctrlr_channel *ctrlr_ch;
     695             :         struct nvme_bdev *nbdev;
     696             : 
     697          35 :         nbdev = spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(nbdev_ch));
     698             : 
     699             :         /* Add the statistics to nvme_ns before this path is destroyed. */
     700          35 :         pthread_mutex_lock(&nbdev->mutex);
     701          35 :         if (nbdev->ref != 0 && io_path->nvme_ns->stat != NULL && io_path->stat != NULL) {
     702           0 :                 spdk_bdev_add_io_stat(io_path->nvme_ns->stat, io_path->stat);
     703             :         }
     704          35 :         pthread_mutex_unlock(&nbdev->mutex);
     705             : 
     706          35 :         bdev_nvme_clear_current_io_path(nbdev_ch);
     707          35 :         bdev_nvme_clear_retry_io_path(nbdev_ch, io_path);
     708             : 
     709          35 :         STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
     710          35 :         io_path->nbdev_ch = NULL;
     711             : 
     712          35 :         nvme_qpair = io_path->qpair;
     713          35 :         assert(nvme_qpair != NULL);
     714             : 
     715          35 :         ctrlr_ch = nvme_qpair->ctrlr_ch;
     716          35 :         assert(ctrlr_ch != NULL);
     717             : 
     718          35 :         ch = spdk_io_channel_from_ctx(ctrlr_ch);
     719          35 :         spdk_put_io_channel(ch);
     720             : 
     721             :         /* After an io_path is removed, I/Os submitted to it may complete and update statistics
     722             :          * of the io_path. To avoid heap-use-after-free error from this case, do not free the
     723             :          * io_path here but free the io_path when the associated qpair is freed. It is ensured
     724             :          * that all I/Os submitted to the io_path are completed when the associated qpair is freed.
     725             :          */
     726          35 : }
     727             : 
     728             : static void
     729          22 : _bdev_nvme_delete_io_paths(struct nvme_bdev_channel *nbdev_ch)
     730             : {
     731             :         struct nvme_io_path *io_path, *tmp_io_path;
     732             : 
     733          55 :         STAILQ_FOREACH_SAFE(io_path, &nbdev_ch->io_path_list, stailq, tmp_io_path) {
     734          33 :                 _bdev_nvme_delete_io_path(nbdev_ch, io_path);
     735             :         }
     736          22 : }
     737             : 
     738             : static int
     739          22 : bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
     740             : {
     741          22 :         struct nvme_bdev_channel *nbdev_ch = ctx_buf;
     742          22 :         struct nvme_bdev *nbdev = io_device;
     743             :         struct nvme_ns *nvme_ns;
     744             :         int rc;
     745             : 
     746          22 :         STAILQ_INIT(&nbdev_ch->io_path_list);
     747          22 :         TAILQ_INIT(&nbdev_ch->retry_io_list);
     748             : 
     749          22 :         pthread_mutex_lock(&nbdev->mutex);
     750             : 
     751          22 :         nbdev_ch->mp_policy = nbdev->mp_policy;
     752          22 :         nbdev_ch->mp_selector = nbdev->mp_selector;
     753          22 :         nbdev_ch->rr_min_io = nbdev->rr_min_io;
     754             : 
     755          55 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
     756          33 :                 rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
     757          33 :                 if (rc != 0) {
     758           0 :                         pthread_mutex_unlock(&nbdev->mutex);
     759             : 
     760           0 :                         _bdev_nvme_delete_io_paths(nbdev_ch);
     761           0 :                         return rc;
     762             :                 }
     763             :         }
     764          22 :         pthread_mutex_unlock(&nbdev->mutex);
     765             : 
     766          22 :         return 0;
     767             : }
     768             : 
     769             : /* If cpl != NULL, complete the bdev_io with nvme status based on 'cpl'.
     770             :  * If cpl == NULL, complete the bdev_io with bdev status based on 'status'.
     771             :  */
     772             : static inline void
     773          47 : __bdev_nvme_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status,
     774             :                         const struct spdk_nvme_cpl *cpl)
     775             : {
     776          47 :         spdk_trace_record(TRACE_BDEV_NVME_IO_DONE, 0, 0, (uintptr_t)bdev_io->driver_ctx,
     777             :                           (uintptr_t)bdev_io);
     778          47 :         if (cpl) {
     779          29 :                 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
     780             :         } else {
     781          18 :                 spdk_bdev_io_complete(bdev_io, status);
     782             :         }
     783          47 : }
     784             : 
     785             : static void bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch);
     786             : 
     787             : static void
     788          22 : bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf)
     789             : {
     790          22 :         struct nvme_bdev_channel *nbdev_ch = ctx_buf;
     791             : 
     792          22 :         bdev_nvme_abort_retry_ios(nbdev_ch);
     793          22 :         _bdev_nvme_delete_io_paths(nbdev_ch);
     794          22 : }
     795             : 
     796             : static inline bool
     797          58 : bdev_nvme_io_type_is_admin(enum spdk_bdev_io_type io_type)
     798             : {
     799          58 :         switch (io_type) {
     800           5 :         case SPDK_BDEV_IO_TYPE_RESET:
     801             :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
     802             :         case SPDK_BDEV_IO_TYPE_ABORT:
     803           5 :                 return true;
     804          53 :         default:
     805          53 :                 break;
     806             :         }
     807             : 
     808          53 :         return false;
     809             : }
     810             : 
     811             : static inline bool
     812          77 : nvme_ns_is_active(struct nvme_ns *nvme_ns)
     813             : {
     814          77 :         if (spdk_unlikely(nvme_ns->ana_state_updating)) {
     815           1 :                 return false;
     816             :         }
     817             : 
     818          76 :         if (spdk_unlikely(nvme_ns->ns == NULL)) {
     819           0 :                 return false;
     820             :         }
     821             : 
     822          76 :         return true;
     823             : }
     824             : 
     825             : static inline bool
     826           8 : nvme_ns_is_accessible(struct nvme_ns *nvme_ns)
     827             : {
     828           8 :         if (spdk_unlikely(!nvme_ns_is_active(nvme_ns))) {
     829           0 :                 return false;
     830             :         }
     831             : 
     832           8 :         switch (nvme_ns->ana_state) {
     833           8 :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
     834             :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
     835           8 :                 return true;
     836           0 :         default:
     837           0 :                 break;
     838             :         }
     839             : 
     840           0 :         return false;
     841             : }
     842             : 
     843             : static inline bool
     844         102 : nvme_qpair_is_connected(struct nvme_qpair *nvme_qpair)
     845             : {
     846         102 :         if (spdk_unlikely(nvme_qpair->qpair == NULL)) {
     847          20 :                 return false;
     848             :         }
     849             : 
     850          82 :         if (spdk_unlikely(spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
     851             :                           SPDK_NVME_QPAIR_FAILURE_NONE)) {
     852           0 :                 return false;
     853             :         }
     854             : 
     855          82 :         if (spdk_unlikely(nvme_qpair->ctrlr_ch->reset_iter != NULL)) {
     856           0 :                 return false;
     857             :         }
     858             : 
     859          82 :         return true;
     860             : }
     861             : 
     862             : static inline bool
     863           8 : nvme_io_path_is_available(struct nvme_io_path *io_path)
     864             : {
     865           8 :         if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
     866           0 :                 return false;
     867             :         }
     868             : 
     869           8 :         if (spdk_unlikely(!nvme_ns_is_accessible(io_path->nvme_ns))) {
     870           0 :                 return false;
     871             :         }
     872             : 
     873           8 :         return true;
     874             : }
     875             : 
     876             : static inline bool
     877           8 : nvme_ctrlr_is_failed(struct nvme_ctrlr *nvme_ctrlr)
     878             : {
     879           8 :         if (nvme_ctrlr->destruct) {
     880           0 :                 return true;
     881             :         }
     882             : 
     883           8 :         if (nvme_ctrlr->fast_io_fail_timedout) {
     884           2 :                 return true;
     885             :         }
     886             : 
     887           6 :         if (nvme_ctrlr->resetting) {
     888           4 :                 if (nvme_ctrlr->opts.reconnect_delay_sec != 0) {
     889           4 :                         return false;
     890             :                 } else {
     891           0 :                         return true;
     892             :                 }
     893             :         }
     894             : 
     895           2 :         if (nvme_ctrlr->reconnect_is_delayed) {
     896           2 :                 return false;
     897             :         }
     898             : 
     899           0 :         if (nvme_ctrlr->disabled) {
     900           0 :                 return true;
     901             :         }
     902             : 
     903           0 :         if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
     904           0 :                 return true;
     905             :         } else {
     906           0 :                 return false;
     907             :         }
     908             : }
     909             : 
     910             : static bool
     911          20 : nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr)
     912             : {
     913          20 :         if (nvme_ctrlr->destruct) {
     914           0 :                 return false;
     915             :         }
     916             : 
     917          20 :         if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
     918           3 :                 return false;
     919             :         }
     920             : 
     921          17 :         if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) {
     922           1 :                 return false;
     923             :         }
     924             : 
     925          16 :         if (nvme_ctrlr->disabled) {
     926           0 :                 return false;
     927             :         }
     928             : 
     929          16 :         return true;
     930             : }
     931             : 
     932             : /* Simulate circular linked list. */
     933             : static inline struct nvme_io_path *
     934          87 : nvme_io_path_get_next(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *prev_path)
     935             : {
     936             :         struct nvme_io_path *next_path;
     937             : 
     938          87 :         if (prev_path != NULL) {
     939          37 :                 next_path = STAILQ_NEXT(prev_path, stailq);
     940          37 :                 if (next_path != NULL) {
     941          14 :                         return next_path;
     942             :                 }
     943             :         }
     944             : 
     945          73 :         return STAILQ_FIRST(&nbdev_ch->io_path_list);
     946             : }
     947             : 
     948             : static struct nvme_io_path *
     949          57 : _bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
     950             : {
     951          57 :         struct nvme_io_path *io_path, *start, *non_optimized = NULL;
     952             : 
     953          57 :         start = nvme_io_path_get_next(nbdev_ch, nbdev_ch->current_io_path);
     954             : 
     955          57 :         io_path = start;
     956             :         do {
     957          69 :                 if (spdk_likely(nvme_qpair_is_connected(io_path->qpair) &&
     958             :                                 nvme_ns_is_active(io_path->nvme_ns))) {
     959          56 :                         switch (io_path->nvme_ns->ana_state) {
     960          39 :                         case SPDK_NVME_ANA_OPTIMIZED_STATE:
     961          39 :                                 nbdev_ch->current_io_path = io_path;
     962          39 :                                 return io_path;
     963          10 :                         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
     964          10 :                                 if (non_optimized == NULL) {
     965           7 :                                         non_optimized = io_path;
     966             :                                 }
     967          10 :                                 break;
     968           7 :                         default:
     969           7 :                                 break;
     970             :                         }
     971          13 :                 }
     972          30 :                 io_path = nvme_io_path_get_next(nbdev_ch, io_path);
     973          30 :         } while (io_path != start);
     974             : 
     975          18 :         if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
     976             :                 /* We come here only if there is no optimized path. Cache even non_optimized
     977             :                  * path for load balance across multiple non_optimized paths.
     978             :                  */
     979           1 :                 nbdev_ch->current_io_path = non_optimized;
     980             :         }
     981             : 
     982          18 :         return non_optimized;
     983             : }
     984             : 
     985             : static struct nvme_io_path *
     986           4 : _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch)
     987             : {
     988             :         struct nvme_io_path *io_path;
     989           4 :         struct nvme_io_path *optimized = NULL, *non_optimized = NULL;
     990           4 :         uint32_t opt_min_qd = UINT32_MAX, non_opt_min_qd = UINT32_MAX;
     991             :         uint32_t num_outstanding_reqs;
     992             : 
     993          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
     994          12 :                 if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
     995             :                         /* The device is currently resetting. */
     996           0 :                         continue;
     997             :                 }
     998             : 
     999          12 :                 if (spdk_unlikely(!nvme_ns_is_active(io_path->nvme_ns))) {
    1000           0 :                         continue;
    1001             :                 }
    1002             : 
    1003          12 :                 num_outstanding_reqs = spdk_nvme_qpair_get_num_outstanding_reqs(io_path->qpair->qpair);
    1004          12 :                 switch (io_path->nvme_ns->ana_state) {
    1005           6 :                 case SPDK_NVME_ANA_OPTIMIZED_STATE:
    1006           6 :                         if (num_outstanding_reqs < opt_min_qd) {
    1007           5 :                                 opt_min_qd = num_outstanding_reqs;
    1008           5 :                                 optimized = io_path;
    1009             :                         }
    1010           6 :                         break;
    1011           3 :                 case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    1012           3 :                         if (num_outstanding_reqs < non_opt_min_qd) {
    1013           3 :                                 non_opt_min_qd = num_outstanding_reqs;
    1014           3 :                                 non_optimized = io_path;
    1015             :                         }
    1016           3 :                         break;
    1017           3 :                 default:
    1018           3 :                         break;
    1019             :                 }
    1020             :         }
    1021             : 
    1022             :         /* don't cache io path for BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH selector */
    1023           4 :         if (optimized != NULL) {
    1024           3 :                 return optimized;
    1025             :         }
    1026             : 
    1027           1 :         return non_optimized;
    1028             : }
    1029             : 
    1030             : static inline struct nvme_io_path *
    1031          95 : bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
    1032             : {
    1033          95 :         if (spdk_likely(nbdev_ch->current_io_path != NULL)) {
    1034          41 :                 if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) {
    1035          31 :                         return nbdev_ch->current_io_path;
    1036          10 :                 } else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    1037          10 :                         if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) {
    1038           3 :                                 return nbdev_ch->current_io_path;
    1039             :                         }
    1040           7 :                         nbdev_ch->rr_counter = 0;
    1041             :                 }
    1042             :         }
    1043             : 
    1044          61 :         if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE ||
    1045          14 :             nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    1046          57 :                 return _bdev_nvme_find_io_path(nbdev_ch);
    1047             :         } else {
    1048           4 :                 return _bdev_nvme_find_io_path_min_qd(nbdev_ch);
    1049             :         }
    1050             : }
    1051             : 
    1052             : /* Return true if there is any io_path whose qpair is active or ctrlr is not failed,
    1053             :  * or false otherwise.
    1054             :  *
    1055             :  * If any io_path has an active qpair but find_io_path() returned NULL, its namespace
    1056             :  * is likely to be non-accessible now but may become accessible.
    1057             :  *
    1058             :  * If any io_path has an unfailed ctrlr but find_io_path() returned NULL, the ctrlr
    1059             :  * is likely to be resetting now but the reset may succeed. A ctrlr is set to unfailed
    1060             :  * when starting to reset it but it is set to failed when the reset failed. Hence, if
    1061             :  * a ctrlr is unfailed, it is likely that it works fine or is resetting.
    1062             :  */
    1063             : static bool
    1064          13 : any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
    1065             : {
    1066             :         struct nvme_io_path *io_path;
    1067             : 
    1068          15 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    1069          13 :                 if (io_path->nvme_ns->ana_transition_timedout) {
    1070           0 :                         continue;
    1071             :                 }
    1072             : 
    1073          13 :                 if (nvme_qpair_is_connected(io_path->qpair) ||
    1074           8 :                     !nvme_ctrlr_is_failed(io_path->qpair->ctrlr)) {
    1075          11 :                         return true;
    1076             :                 }
    1077             :         }
    1078             : 
    1079           2 :         return false;
    1080             : }
    1081             : 
    1082             : static void
    1083          14 : bdev_nvme_retry_io(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
    1084             : {
    1085          14 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    1086             :         struct spdk_io_channel *ch;
    1087             : 
    1088          14 :         if (nbdev_io->io_path != NULL && nvme_io_path_is_available(nbdev_io->io_path)) {
    1089           3 :                 _bdev_nvme_submit_request(nbdev_ch, bdev_io);
    1090             :         } else {
    1091          11 :                 ch = spdk_io_channel_from_ctx(nbdev_ch);
    1092          11 :                 bdev_nvme_submit_request(ch, bdev_io);
    1093             :         }
    1094          14 : }
    1095             : 
    1096             : static int
    1097          14 : bdev_nvme_retry_ios(void *arg)
    1098             : {
    1099          14 :         struct nvme_bdev_channel *nbdev_ch = arg;
    1100             :         struct spdk_bdev_io *bdev_io, *tmp_bdev_io;
    1101             :         struct nvme_bdev_io *bio;
    1102             :         uint64_t now, delay_us;
    1103             : 
    1104          14 :         now = spdk_get_ticks();
    1105             : 
    1106          28 :         TAILQ_FOREACH_SAFE(bdev_io, &nbdev_ch->retry_io_list, module_link, tmp_bdev_io) {
    1107          15 :                 bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    1108          15 :                 if (bio->retry_ticks > now) {
    1109           1 :                         break;
    1110             :                 }
    1111             : 
    1112          14 :                 TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
    1113             : 
    1114          14 :                 bdev_nvme_retry_io(nbdev_ch, bdev_io);
    1115             :         }
    1116             : 
    1117          14 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1118             : 
    1119          14 :         bdev_io = TAILQ_FIRST(&nbdev_ch->retry_io_list);
    1120          14 :         if (bdev_io != NULL) {
    1121           4 :                 bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    1122             : 
    1123           4 :                 delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
    1124             : 
    1125           4 :                 nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
    1126             :                                             delay_us);
    1127             :         }
    1128             : 
    1129          14 :         return SPDK_POLLER_BUSY;
    1130             : }
    1131             : 
    1132             : static void
    1133          15 : bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
    1134             :                          struct nvme_bdev_io *bio, uint64_t delay_ms)
    1135             : {
    1136          15 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1137             :         struct spdk_bdev_io *tmp_bdev_io;
    1138             :         struct nvme_bdev_io *tmp_bio;
    1139             : 
    1140          15 :         bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;
    1141             : 
    1142          15 :         TAILQ_FOREACH_REVERSE(tmp_bdev_io, &nbdev_ch->retry_io_list, retry_io_head, module_link) {
    1143           1 :                 tmp_bio = (struct nvme_bdev_io *)tmp_bdev_io->driver_ctx;
    1144             : 
    1145           1 :                 if (tmp_bio->retry_ticks <= bio->retry_ticks) {
    1146           1 :                         TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bdev_io, bdev_io,
    1147             :                                            module_link);
    1148           1 :                         return;
    1149             :                 }
    1150             :         }
    1151             : 
    1152             :         /* No earlier I/Os were found. This I/O must be the new head. */
    1153          14 :         TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bdev_io, module_link);
    1154             : 
    1155          14 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1156             : 
    1157          14 :         nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
    1158             :                                     delay_ms * 1000ULL);
    1159             : }
    1160             : 
    1161             : static void
    1162          30 : bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
    1163             : {
    1164             :         struct spdk_bdev_io *bdev_io, *tmp_io;
    1165             : 
    1166          30 :         TAILQ_FOREACH_SAFE(bdev_io, &nbdev_ch->retry_io_list, module_link, tmp_io) {
    1167           0 :                 TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
    1168           0 :                 __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_ABORTED, NULL);
    1169             :         }
    1170             : 
    1171          30 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1172          30 : }
    1173             : 
    1174             : static int
    1175           6 : bdev_nvme_abort_retry_io(struct nvme_bdev_channel *nbdev_ch,
    1176             :                          struct nvme_bdev_io *bio_to_abort)
    1177             : {
    1178             :         struct spdk_bdev_io *bdev_io_to_abort;
    1179             : 
    1180           6 :         TAILQ_FOREACH(bdev_io_to_abort, &nbdev_ch->retry_io_list, module_link) {
    1181           1 :                 if ((struct nvme_bdev_io *)bdev_io_to_abort->driver_ctx == bio_to_abort) {
    1182           1 :                         TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io_to_abort, module_link);
    1183           1 :                         __bdev_nvme_io_complete(bdev_io_to_abort, SPDK_BDEV_IO_STATUS_ABORTED, NULL);
    1184           1 :                         return 0;
    1185             :                 }
    1186             :         }
    1187             : 
    1188           5 :         return -ENOENT;
    1189             : }
    1190             : 
    1191             : static void
    1192          12 : bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl)
    1193             : {
    1194             :         struct nvme_bdev *nbdev;
    1195             :         uint16_t sct, sc;
    1196             : 
    1197          12 :         assert(spdk_nvme_cpl_is_error(cpl));
    1198             : 
    1199          12 :         nbdev = bdev_io->bdev->ctxt;
    1200             : 
    1201          12 :         if (nbdev->err_stat == NULL) {
    1202          12 :                 return;
    1203             :         }
    1204             : 
    1205           0 :         sct = cpl->status.sct;
    1206           0 :         sc = cpl->status.sc;
    1207             : 
    1208           0 :         pthread_mutex_lock(&nbdev->mutex);
    1209             : 
    1210           0 :         nbdev->err_stat->status_type[sct]++;
    1211           0 :         switch (sct) {
    1212           0 :         case SPDK_NVME_SCT_GENERIC:
    1213             :         case SPDK_NVME_SCT_COMMAND_SPECIFIC:
    1214             :         case SPDK_NVME_SCT_MEDIA_ERROR:
    1215             :         case SPDK_NVME_SCT_PATH:
    1216           0 :                 nbdev->err_stat->status[sct][sc]++;
    1217           0 :                 break;
    1218           0 :         default:
    1219           0 :                 break;
    1220             :         }
    1221             : 
    1222           0 :         pthread_mutex_unlock(&nbdev->mutex);
    1223             : }
    1224             : 
    1225             : static inline void
    1226          20 : bdev_nvme_update_io_path_stat(struct nvme_bdev_io *bio)
    1227             : {
    1228          20 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1229          20 :         uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
    1230          20 :         uint32_t blocklen = bdev_io->bdev->blocklen;
    1231             :         struct spdk_bdev_io_stat *stat;
    1232             :         uint64_t tsc_diff;
    1233             : 
    1234          20 :         if (bio->io_path->stat == NULL) {
    1235          20 :                 return;
    1236             :         }
    1237             : 
    1238           0 :         tsc_diff = spdk_get_ticks() - bio->submit_tsc;
    1239           0 :         stat = bio->io_path->stat;
    1240             : 
    1241           0 :         switch (bdev_io->type) {
    1242           0 :         case SPDK_BDEV_IO_TYPE_READ:
    1243           0 :                 stat->bytes_read += num_blocks * blocklen;
    1244           0 :                 stat->num_read_ops++;
    1245           0 :                 stat->read_latency_ticks += tsc_diff;
    1246           0 :                 if (stat->max_read_latency_ticks < tsc_diff) {
    1247           0 :                         stat->max_read_latency_ticks = tsc_diff;
    1248             :                 }
    1249           0 :                 if (stat->min_read_latency_ticks > tsc_diff) {
    1250           0 :                         stat->min_read_latency_ticks = tsc_diff;
    1251             :                 }
    1252           0 :                 break;
    1253           0 :         case SPDK_BDEV_IO_TYPE_WRITE:
    1254           0 :                 stat->bytes_written += num_blocks * blocklen;
    1255           0 :                 stat->num_write_ops++;
    1256           0 :                 stat->write_latency_ticks += tsc_diff;
    1257           0 :                 if (stat->max_write_latency_ticks < tsc_diff) {
    1258           0 :                         stat->max_write_latency_ticks = tsc_diff;
    1259             :                 }
    1260           0 :                 if (stat->min_write_latency_ticks > tsc_diff) {
    1261           0 :                         stat->min_write_latency_ticks = tsc_diff;
    1262             :                 }
    1263           0 :                 break;
    1264           0 :         case SPDK_BDEV_IO_TYPE_UNMAP:
    1265           0 :                 stat->bytes_unmapped += num_blocks * blocklen;
    1266           0 :                 stat->num_unmap_ops++;
    1267           0 :                 stat->unmap_latency_ticks += tsc_diff;
    1268           0 :                 if (stat->max_unmap_latency_ticks < tsc_diff) {
    1269           0 :                         stat->max_unmap_latency_ticks = tsc_diff;
    1270             :                 }
    1271           0 :                 if (stat->min_unmap_latency_ticks > tsc_diff) {
    1272           0 :                         stat->min_unmap_latency_ticks = tsc_diff;
    1273             :                 }
    1274           0 :                 break;
    1275           0 :         case SPDK_BDEV_IO_TYPE_ZCOPY:
    1276             :                 /* Track the data in the start phase only */
    1277           0 :                 if (!bdev_io->u.bdev.zcopy.start) {
    1278           0 :                         break;
    1279             :                 }
    1280           0 :                 if (bdev_io->u.bdev.zcopy.populate) {
    1281           0 :                         stat->bytes_read += num_blocks * blocklen;
    1282           0 :                         stat->num_read_ops++;
    1283           0 :                         stat->read_latency_ticks += tsc_diff;
    1284           0 :                         if (stat->max_read_latency_ticks < tsc_diff) {
    1285           0 :                                 stat->max_read_latency_ticks = tsc_diff;
    1286             :                         }
    1287           0 :                         if (stat->min_read_latency_ticks > tsc_diff) {
    1288           0 :                                 stat->min_read_latency_ticks = tsc_diff;
    1289             :                         }
    1290             :                 } else {
    1291           0 :                         stat->bytes_written += num_blocks * blocklen;
    1292           0 :                         stat->num_write_ops++;
    1293           0 :                         stat->write_latency_ticks += tsc_diff;
    1294           0 :                         if (stat->max_write_latency_ticks < tsc_diff) {
    1295           0 :                                 stat->max_write_latency_ticks = tsc_diff;
    1296             :                         }
    1297           0 :                         if (stat->min_write_latency_ticks > tsc_diff) {
    1298           0 :                                 stat->min_write_latency_ticks = tsc_diff;
    1299             :                         }
    1300             :                 }
    1301           0 :                 break;
    1302           0 :         case SPDK_BDEV_IO_TYPE_COPY:
    1303           0 :                 stat->bytes_copied += num_blocks * blocklen;
    1304           0 :                 stat->num_copy_ops++;
    1305           0 :                 stat->copy_latency_ticks += tsc_diff;
    1306           0 :                 if (stat->max_copy_latency_ticks < tsc_diff) {
    1307           0 :                         stat->max_copy_latency_ticks = tsc_diff;
    1308             :                 }
    1309           0 :                 if (stat->min_copy_latency_ticks > tsc_diff) {
    1310           0 :                         stat->min_copy_latency_ticks = tsc_diff;
    1311             :                 }
    1312           0 :                 break;
    1313           0 :         default:
    1314           0 :                 break;
    1315             :         }
    1316             : }
    1317             : 
    1318             : static bool
    1319           7 : bdev_nvme_check_retry_io(struct nvme_bdev_io *bio,
    1320             :                          const struct spdk_nvme_cpl *cpl,
    1321             :                          struct nvme_bdev_channel *nbdev_ch,
    1322             :                          uint64_t *_delay_ms)
    1323             : {
    1324           7 :         struct nvme_io_path *io_path = bio->io_path;
    1325           7 :         struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
    1326             :         const struct spdk_nvme_ctrlr_data *cdata;
    1327             : 
    1328           7 :         if (spdk_nvme_cpl_is_path_error(cpl) ||
    1329           5 :             spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
    1330           4 :             !nvme_io_path_is_available(io_path) ||
    1331           4 :             !nvme_ctrlr_is_available(nvme_ctrlr)) {
    1332           3 :                 bdev_nvme_clear_current_io_path(nbdev_ch);
    1333           3 :                 bio->io_path = NULL;
    1334           3 :                 if (spdk_nvme_cpl_is_ana_error(cpl)) {
    1335           1 :                         if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
    1336           1 :                                 io_path->nvme_ns->ana_state_updating = true;
    1337             :                         }
    1338             :                 }
    1339           3 :                 if (!any_io_path_may_become_available(nbdev_ch)) {
    1340           0 :                         return false;
    1341             :                 }
    1342           3 :                 *_delay_ms = 0;
    1343             :         } else {
    1344           4 :                 bio->retry_count++;
    1345             : 
    1346           4 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    1347             : 
    1348           4 :                 if (cpl->status.crd != 0) {
    1349           1 :                         *_delay_ms = cdata->crdt[cpl->status.crd] * 100;
    1350             :                 } else {
    1351           3 :                         *_delay_ms = 0;
    1352             :                 }
    1353             :         }
    1354             : 
    1355           7 :         return true;
    1356             : }
    1357             : 
    1358             : static inline void
    1359          32 : bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
    1360             :                                   const struct spdk_nvme_cpl *cpl)
    1361             : {
    1362          32 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1363             :         struct nvme_bdev_channel *nbdev_ch;
    1364          32 :         uint64_t delay_ms;
    1365             : 
    1366          32 :         assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
    1367             : 
    1368          32 :         if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
    1369          20 :                 bdev_nvme_update_io_path_stat(bio);
    1370          20 :                 goto complete;
    1371             :         }
    1372             : 
    1373             :         /* Update error counts before deciding if retry is needed.
    1374             :          * Hence, error counts may be more than the number of I/O errors.
    1375             :          */
    1376          12 :         bdev_nvme_update_nvme_error_stat(bdev_io, cpl);
    1377             : 
    1378          12 :         if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) ||
    1379           8 :             (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
    1380           5 :                 goto complete;
    1381             :         }
    1382             : 
    1383             :         /* At this point we don't know whether the sequence was successfully executed or not, so we
    1384             :          * cannot retry the IO */
    1385           7 :         if (bdev_io->u.bdev.accel_sequence != NULL) {
    1386           0 :                 goto complete;
    1387             :         }
    1388             : 
    1389           7 :         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    1390             : 
    1391           7 :         if (bdev_nvme_check_retry_io(bio, cpl, nbdev_ch, &delay_ms)) {
    1392           7 :                 bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
    1393           7 :                 return;
    1394             :         }
    1395             : 
    1396          25 : complete:
    1397          25 :         bio->retry_count = 0;
    1398          25 :         bio->submit_tsc = 0;
    1399          25 :         bdev_io->u.bdev.accel_sequence = NULL;
    1400          25 :         __bdev_nvme_io_complete(bdev_io, 0, cpl);
    1401             : }
    1402             : 
    1403             : static inline void
    1404          11 : bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
    1405             : {
    1406          11 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1407             :         struct nvme_bdev_channel *nbdev_ch;
    1408             :         enum spdk_bdev_io_status io_status;
    1409             : 
    1410          11 :         assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
    1411             : 
    1412          11 :         switch (rc) {
    1413           1 :         case 0:
    1414           1 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    1415           1 :                 break;
    1416           0 :         case -ENOMEM:
    1417           0 :                 io_status = SPDK_BDEV_IO_STATUS_NOMEM;
    1418           0 :                 break;
    1419          10 :         case -ENXIO:
    1420          10 :                 if (g_opts.bdev_retry_count == -1 || bio->retry_count < g_opts.bdev_retry_count) {
    1421          10 :                         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    1422             : 
    1423          10 :                         bdev_nvme_clear_current_io_path(nbdev_ch);
    1424          10 :                         bio->io_path = NULL;
    1425             : 
    1426          10 :                         if (any_io_path_may_become_available(nbdev_ch)) {
    1427           8 :                                 bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
    1428           8 :                                 return;
    1429             :                         }
    1430             :                 }
    1431             : 
    1432             :         /* fallthrough */
    1433             :         default:
    1434           2 :                 spdk_accel_sequence_abort(bdev_io->u.bdev.accel_sequence);
    1435           2 :                 bdev_io->u.bdev.accel_sequence = NULL;
    1436           2 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    1437           2 :                 break;
    1438             :         }
    1439             : 
    1440           3 :         bio->retry_count = 0;
    1441           3 :         bio->submit_tsc = 0;
    1442           3 :         __bdev_nvme_io_complete(bdev_io, io_status, NULL);
    1443             : }
    1444             : 
    1445             : static inline void
    1446           4 : bdev_nvme_admin_complete(struct nvme_bdev_io *bio, int rc)
    1447             : {
    1448           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1449             :         enum spdk_bdev_io_status io_status;
    1450             : 
    1451           4 :         switch (rc) {
    1452           1 :         case 0:
    1453           1 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    1454           1 :                 break;
    1455           0 :         case -ENOMEM:
    1456           0 :                 io_status = SPDK_BDEV_IO_STATUS_NOMEM;
    1457           0 :                 break;
    1458           3 :         case -ENXIO:
    1459             :         /* fallthrough */
    1460             :         default:
    1461           3 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    1462           3 :                 break;
    1463             :         }
    1464             : 
    1465           4 :         __bdev_nvme_io_complete(bdev_io, io_status, NULL);
    1466           4 : }
    1467             : 
    1468             : static void
    1469           3 : bdev_nvme_clear_io_path_caches_done(struct spdk_io_channel_iter *i, int status)
    1470             : {
    1471           3 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    1472             : 
    1473           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    1474             : 
    1475           3 :         assert(nvme_ctrlr->io_path_cache_clearing == true);
    1476           3 :         nvme_ctrlr->io_path_cache_clearing = false;
    1477             : 
    1478           3 :         if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    1479           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1480           3 :                 return;
    1481             :         }
    1482             : 
    1483           0 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1484             : 
    1485           0 :         nvme_ctrlr_unregister(nvme_ctrlr);
    1486             : }
    1487             : 
    1488             : static void
    1489         320 : _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
    1490             : {
    1491             :         struct nvme_io_path *io_path;
    1492             : 
    1493         459 :         TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
    1494         139 :                 if (io_path->nbdev_ch == NULL) {
    1495          64 :                         continue;
    1496             :                 }
    1497          75 :                 bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
    1498             :         }
    1499         320 : }
    1500             : 
    1501             : static void
    1502           1 : bdev_nvme_clear_io_path_cache(struct spdk_io_channel_iter *i)
    1503             : {
    1504           1 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    1505           1 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
    1506             : 
    1507           1 :         assert(ctrlr_ch->qpair != NULL);
    1508             : 
    1509           1 :         _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
    1510             : 
    1511           1 :         spdk_for_each_channel_continue(i, 0);
    1512           1 : }
    1513             : 
    1514             : static void
    1515           3 : bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr)
    1516             : {
    1517           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    1518           3 :         if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
    1519             :             nvme_ctrlr->io_path_cache_clearing) {
    1520           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1521           0 :                 return;
    1522             :         }
    1523             : 
    1524           3 :         nvme_ctrlr->io_path_cache_clearing = true;
    1525           3 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1526             : 
    1527           3 :         spdk_for_each_channel(nvme_ctrlr,
    1528             :                               bdev_nvme_clear_io_path_cache,
    1529             :                               NULL,
    1530             :                               bdev_nvme_clear_io_path_caches_done);
    1531             : }
    1532             : 
    1533             : static struct nvme_qpair *
    1534          99 : nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair *qpair)
    1535             : {
    1536             :         struct nvme_qpair *nvme_qpair;
    1537             : 
    1538         108 :         TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
    1539         108 :                 if (nvme_qpair->qpair == qpair) {
    1540          99 :                         break;
    1541             :                 }
    1542             :         }
    1543             : 
    1544          99 :         return nvme_qpair;
    1545             : }
    1546             : 
    1547             : static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair);
    1548             : 
    1549             : static void
    1550          99 : bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
    1551             : {
    1552          99 :         struct nvme_poll_group *group = poll_group_ctx;
    1553             :         struct nvme_qpair *nvme_qpair;
    1554             :         struct nvme_ctrlr_channel *ctrlr_ch;
    1555             :         int status;
    1556             : 
    1557          99 :         nvme_qpair = nvme_poll_group_get_qpair(group, qpair);
    1558          99 :         if (nvme_qpair == NULL) {
    1559           0 :                 return;
    1560             :         }
    1561             : 
    1562          99 :         if (nvme_qpair->qpair != NULL) {
    1563          99 :                 spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
    1564          99 :                 nvme_qpair->qpair = NULL;
    1565             :         }
    1566             : 
    1567          99 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1568             : 
    1569          99 :         ctrlr_ch = nvme_qpair->ctrlr_ch;
    1570             : 
    1571          99 :         if (ctrlr_ch != NULL) {
    1572          56 :                 if (ctrlr_ch->reset_iter != NULL) {
    1573             :                         /* We are in a full reset sequence. */
    1574          52 :                         if (ctrlr_ch->connect_poller != NULL) {
    1575             :                                 /* qpair was failed to connect. Abort the reset sequence. */
    1576           0 :                                 SPDK_DEBUGLOG(bdev_nvme, "qpair %p was failed to connect. abort the reset ctrlr sequence.\n",
    1577             :                                               qpair);
    1578           0 :                                 spdk_poller_unregister(&ctrlr_ch->connect_poller);
    1579           0 :                                 status = -1;
    1580             :                         } else {
    1581             :                                 /* qpair was completed to disconnect. Just move to the next ctrlr_channel. */
    1582          52 :                                 SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed in a reset ctrlr sequence.\n",
    1583             :                                               qpair);
    1584          52 :                                 status = 0;
    1585             :                         }
    1586          52 :                         spdk_for_each_channel_continue(ctrlr_ch->reset_iter, status);
    1587          52 :                         ctrlr_ch->reset_iter = NULL;
    1588             :                 } else {
    1589             :                         /* qpair was disconnected unexpectedly. Reset controller for recovery. */
    1590           4 :                         SPDK_NOTICELOG("qpair %p was disconnected and freed. reset controller.\n", qpair);
    1591           4 :                         bdev_nvme_failover_ctrlr(nvme_qpair->ctrlr);
    1592             :                 }
    1593             :         } else {
    1594             :                 /* In this case, ctrlr_channel is already deleted. */
    1595          43 :                 SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed. delete nvme_qpair.\n", qpair);
    1596          43 :                 nvme_qpair_delete(nvme_qpair);
    1597             :         }
    1598             : }
    1599             : 
    1600             : static void
    1601           0 : bdev_nvme_check_io_qpairs(struct nvme_poll_group *group)
    1602             : {
    1603             :         struct nvme_qpair *nvme_qpair;
    1604             : 
    1605           0 :         TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
    1606           0 :                 if (nvme_qpair->qpair == NULL || nvme_qpair->ctrlr_ch == NULL) {
    1607           0 :                         continue;
    1608             :                 }
    1609             : 
    1610           0 :                 if (spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
    1611             :                     SPDK_NVME_QPAIR_FAILURE_NONE) {
    1612           0 :                         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1613             :                 }
    1614             :         }
    1615           0 : }
    1616             : 
    1617             : static int
    1618        1018 : bdev_nvme_poll(void *arg)
    1619             : {
    1620        1018 :         struct nvme_poll_group *group = arg;
    1621             :         int64_t num_completions;
    1622             : 
    1623        1018 :         if (group->collect_spin_stat && group->start_ticks == 0) {
    1624           0 :                 group->start_ticks = spdk_get_ticks();
    1625             :         }
    1626             : 
    1627        1018 :         num_completions = spdk_nvme_poll_group_process_completions(group->group, 0,
    1628             :                           bdev_nvme_disconnected_qpair_cb);
    1629        1018 :         if (group->collect_spin_stat) {
    1630           0 :                 if (num_completions > 0) {
    1631           0 :                         if (group->end_ticks != 0) {
    1632           0 :                                 group->spin_ticks += (group->end_ticks - group->start_ticks);
    1633           0 :                                 group->end_ticks = 0;
    1634             :                         }
    1635           0 :                         group->start_ticks = 0;
    1636             :                 } else {
    1637           0 :                         group->end_ticks = spdk_get_ticks();
    1638             :                 }
    1639             :         }
    1640             : 
    1641        1018 :         if (spdk_unlikely(num_completions < 0)) {
    1642           0 :                 bdev_nvme_check_io_qpairs(group);
    1643             :         }
    1644             : 
    1645        1018 :         return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
    1646             : }
    1647             : 
    1648             : static int bdev_nvme_poll_adminq(void *arg);
    1649             : 
    1650             : static void
    1651         100 : bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_period_us)
    1652             : {
    1653         100 :         spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
    1654             : 
    1655         100 :         nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq,
    1656             :                                           nvme_ctrlr, new_period_us);
    1657         100 : }
    1658             : 
    1659             : static int
    1660         146 : bdev_nvme_poll_adminq(void *arg)
    1661             : {
    1662             :         int32_t rc;
    1663         146 :         struct nvme_ctrlr *nvme_ctrlr = arg;
    1664             :         nvme_ctrlr_disconnected_cb disconnected_cb;
    1665             : 
    1666         146 :         assert(nvme_ctrlr != NULL);
    1667             : 
    1668         146 :         rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
    1669         146 :         if (rc < 0) {
    1670          53 :                 disconnected_cb = nvme_ctrlr->disconnected_cb;
    1671          53 :                 nvme_ctrlr->disconnected_cb = NULL;
    1672             : 
    1673          53 :                 if (disconnected_cb != NULL) {
    1674          50 :                         bdev_nvme_change_adminq_poll_period(nvme_ctrlr,
    1675             :                                                             g_opts.nvme_adminq_poll_period_us);
    1676          50 :                         disconnected_cb(nvme_ctrlr);
    1677             :                 } else {
    1678           3 :                         bdev_nvme_failover_ctrlr(nvme_ctrlr);
    1679             :                 }
    1680          93 :         } else if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(nvme_ctrlr->ctrlr) !=
    1681             :                    SPDK_NVME_QPAIR_FAILURE_NONE) {
    1682           0 :                 bdev_nvme_clear_io_path_caches(nvme_ctrlr);
    1683             :         }
    1684             : 
    1685         146 :         return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
    1686             : }
    1687             : 
    1688             : static void
    1689          37 : nvme_bdev_free(void *io_device)
    1690             : {
    1691          37 :         struct nvme_bdev *nvme_disk = io_device;
    1692             : 
    1693          37 :         pthread_mutex_destroy(&nvme_disk->mutex);
    1694          37 :         free(nvme_disk->disk.name);
    1695          37 :         free(nvme_disk->err_stat);
    1696          37 :         free(nvme_disk);
    1697          37 : }
    1698             : 
    1699             : static int
    1700          36 : bdev_nvme_destruct(void *ctx)
    1701             : {
    1702          36 :         struct nvme_bdev *nvme_disk = ctx;
    1703             :         struct nvme_ns *nvme_ns, *tmp_nvme_ns;
    1704             : 
    1705             :         SPDK_DTRACE_PROBE2(bdev_nvme_destruct, nvme_disk->nbdev_ctrlr->name, nvme_disk->nsid);
    1706             : 
    1707          73 :         TAILQ_FOREACH_SAFE(nvme_ns, &nvme_disk->nvme_ns_list, tailq, tmp_nvme_ns) {
    1708          37 :                 pthread_mutex_lock(&nvme_ns->ctrlr->mutex);
    1709             : 
    1710          37 :                 nvme_ns->bdev = NULL;
    1711             : 
    1712          37 :                 assert(nvme_ns->id > 0);
    1713             : 
    1714          37 :                 if (nvme_ctrlr_get_ns(nvme_ns->ctrlr, nvme_ns->id) == NULL) {
    1715           0 :                         pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
    1716             : 
    1717           0 :                         nvme_ctrlr_release(nvme_ns->ctrlr);
    1718           0 :                         nvme_ns_free(nvme_ns);
    1719             :                 } else {
    1720          37 :                         pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
    1721             :                 }
    1722             :         }
    1723             : 
    1724          36 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    1725          36 :         TAILQ_REMOVE(&nvme_disk->nbdev_ctrlr->bdevs, nvme_disk, tailq);
    1726          36 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    1727             : 
    1728          36 :         spdk_io_device_unregister(nvme_disk, nvme_bdev_free);
    1729             : 
    1730          36 :         return 0;
    1731             : }
    1732             : 
    1733             : static int
    1734         100 : bdev_nvme_create_qpair(struct nvme_qpair *nvme_qpair)
    1735             : {
    1736             :         struct nvme_ctrlr *nvme_ctrlr;
    1737         100 :         struct spdk_nvme_io_qpair_opts opts;
    1738             :         struct spdk_nvme_qpair *qpair;
    1739             :         int rc;
    1740             : 
    1741         100 :         nvme_ctrlr = nvme_qpair->ctrlr;
    1742             : 
    1743         100 :         spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
    1744         100 :         opts.delay_cmd_submit = g_opts.delay_cmd_submit;
    1745         100 :         opts.create_only = true;
    1746         100 :         opts.async_mode = true;
    1747         100 :         opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests);
    1748         100 :         g_opts.io_queue_requests = opts.io_queue_requests;
    1749             : 
    1750         100 :         qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
    1751         100 :         if (qpair == NULL) {
    1752           0 :                 return -1;
    1753             :         }
    1754             : 
    1755             :         SPDK_DTRACE_PROBE3(bdev_nvme_create_qpair, nvme_ctrlr->nbdev_ctrlr->name,
    1756             :                            spdk_nvme_qpair_get_id(qpair), spdk_thread_get_id(nvme_ctrlr->thread));
    1757             : 
    1758         100 :         assert(nvme_qpair->group != NULL);
    1759             : 
    1760         100 :         rc = spdk_nvme_poll_group_add(nvme_qpair->group->group, qpair);
    1761         100 :         if (rc != 0) {
    1762           0 :                 SPDK_ERRLOG("Unable to begin polling on NVMe Channel.\n");
    1763           0 :                 goto err;
    1764             :         }
    1765             : 
    1766         100 :         rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_ctrlr->ctrlr, qpair);
    1767         100 :         if (rc != 0) {
    1768           0 :                 SPDK_ERRLOG("Unable to connect I/O qpair.\n");
    1769           0 :                 goto err;
    1770             :         }
    1771             : 
    1772         100 :         nvme_qpair->qpair = qpair;
    1773             : 
    1774         100 :         if (!g_opts.disable_auto_failback) {
    1775          71 :                 _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1776             :         }
    1777             : 
    1778         100 :         return 0;
    1779             : 
    1780           0 : err:
    1781           0 :         spdk_nvme_ctrlr_free_io_qpair(qpair);
    1782             : 
    1783           0 :         return rc;
    1784             : }
    1785             : 
    1786             : static void
    1787          82 : bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i)
    1788             : {
    1789          82 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    1790          82 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
    1791          82 :         enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
    1792             :         struct spdk_bdev_io *bdev_io;
    1793             : 
    1794          82 :         if (spdk_io_channel_iter_get_ctx(i) != NULL) {
    1795          35 :                 status = SPDK_BDEV_IO_STATUS_FAILED;
    1796             :         }
    1797             : 
    1798          85 :         while (!TAILQ_EMPTY(&ctrlr_ch->pending_resets)) {
    1799           3 :                 bdev_io = TAILQ_FIRST(&ctrlr_ch->pending_resets);
    1800           3 :                 TAILQ_REMOVE(&ctrlr_ch->pending_resets, bdev_io, module_link);
    1801           3 :                 __bdev_nvme_io_complete(bdev_io, status, NULL);
    1802             :         }
    1803             : 
    1804          82 :         spdk_for_each_channel_continue(i, 0);
    1805          82 : }
    1806             : 
    1807             : /* This function marks the current trid as failed by storing the current ticks
    1808             :  * and then sets the next trid to the active trid within a controller if exists.
    1809             :  *
    1810             :  * The purpose of the boolean return value is to request the caller to disconnect
    1811             :  * the current trid now to try connecting the next trid.
    1812             :  */
    1813             : static bool
    1814          36 : bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove, bool start)
    1815             : {
    1816             :         struct nvme_path_id *path_id, *next_path;
    1817             :         int rc __attribute__((unused));
    1818             : 
    1819          36 :         path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
    1820          36 :         assert(path_id);
    1821          36 :         assert(path_id == nvme_ctrlr->active_path_id);
    1822          36 :         next_path = TAILQ_NEXT(path_id, link);
    1823             : 
    1824             :         /* Update the last failed time. It means the trid is failed if its last
    1825             :          * failed time is non-zero.
    1826             :          */
    1827          36 :         path_id->last_failed_tsc = spdk_get_ticks();
    1828             : 
    1829          36 :         if (next_path == NULL) {
    1830             :                 /* There is no alternate trid within a controller. */
    1831          25 :                 return false;
    1832             :         }
    1833             : 
    1834          11 :         if (!start && nvme_ctrlr->opts.reconnect_delay_sec == 0) {
    1835             :                 /* Connect is not retried in a controller reset sequence. Connecting
    1836             :                  * the next trid will be done by the next bdev_nvme_failover_ctrlr() call.
    1837             :                  */
    1838           3 :                 return false;
    1839             :         }
    1840             : 
    1841           8 :         assert(path_id->trid.trtype != SPDK_NVME_TRANSPORT_PCIE);
    1842             : 
    1843           8 :         SPDK_NOTICELOG("Start failover from %s:%s to %s:%s\n", path_id->trid.traddr,
    1844             :                        path_id->trid.trsvcid,        next_path->trid.traddr, next_path->trid.trsvcid);
    1845             : 
    1846           8 :         spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
    1847           8 :         nvme_ctrlr->active_path_id = next_path;
    1848           8 :         rc = spdk_nvme_ctrlr_set_trid(nvme_ctrlr->ctrlr, &next_path->trid);
    1849           8 :         assert(rc == 0);
    1850           8 :         TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
    1851           8 :         if (!remove) {
    1852             :                 /** Shuffle the old trid to the end of the list and use the new one.
    1853             :                  * Allows for round robin through multiple connections.
    1854             :                  */
    1855           6 :                 TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, path_id, link);
    1856             :         } else {
    1857           2 :                 free(path_id);
    1858             :         }
    1859             : 
    1860           8 :         if (start || next_path->last_failed_tsc == 0) {
    1861             :                 /* bdev_nvme_failover_ctrlr() is just called or the next trid is not failed
    1862             :                  * or used yet. Try the next trid now.
    1863             :                  */
    1864           7 :                 return true;
    1865             :         }
    1866             : 
    1867           1 :         if (spdk_get_ticks() > next_path->last_failed_tsc + spdk_get_ticks_hz() *
    1868           1 :             nvme_ctrlr->opts.reconnect_delay_sec) {
    1869             :                 /* Enough backoff passed since the next trid failed. Try the next trid now. */
    1870           0 :                 return true;
    1871             :         }
    1872             : 
    1873             :         /* The next trid will be tried after reconnect_delay_sec seconds. */
    1874           1 :         return false;
    1875             : }
    1876             : 
    1877             : static bool
    1878          68 : bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr)
    1879             : {
    1880             :         int32_t elapsed;
    1881             : 
    1882          68 :         if (nvme_ctrlr->opts.ctrlr_loss_timeout_sec == 0 ||
    1883          36 :             nvme_ctrlr->opts.ctrlr_loss_timeout_sec == -1) {
    1884          42 :                 return false;
    1885             :         }
    1886             : 
    1887          26 :         elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
    1888          26 :         if (elapsed >= nvme_ctrlr->opts.ctrlr_loss_timeout_sec) {
    1889           6 :                 return true;
    1890             :         } else {
    1891          20 :                 return false;
    1892             :         }
    1893             : }
    1894             : 
    1895             : static bool
    1896          12 : bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr)
    1897             : {
    1898             :         uint32_t elapsed;
    1899             : 
    1900          12 :         if (nvme_ctrlr->opts.fast_io_fail_timeout_sec == 0) {
    1901           8 :                 return false;
    1902             :         }
    1903             : 
    1904           4 :         elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
    1905           4 :         if (elapsed >= nvme_ctrlr->opts.fast_io_fail_timeout_sec) {
    1906           2 :                 return true;
    1907             :         } else {
    1908           2 :                 return false;
    1909             :         }
    1910             : }
    1911             : 
    1912             : static void bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success);
    1913             : 
    1914             : static void
    1915          51 : nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb cb_fn)
    1916             : {
    1917             :         int rc;
    1918             : 
    1919          51 :         rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
    1920          51 :         if (rc != 0) {
    1921             :                 /* Disconnect fails if ctrlr is already resetting or removed. In this case,
    1922             :                  * fail the reset sequence immediately.
    1923             :                  */
    1924           1 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    1925           1 :                 return;
    1926             :         }
    1927             : 
    1928             :         /* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
    1929             :          * Set callback here to execute the specified operation after ctrlr is really disconnected.
    1930             :          */
    1931          50 :         assert(nvme_ctrlr->disconnected_cb == NULL);
    1932          50 :         nvme_ctrlr->disconnected_cb = cb_fn;
    1933             : 
    1934             :         /* During disconnection, reduce the period to poll adminq more often. */
    1935          50 :         bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0);
    1936             : }
    1937             : 
    1938             : enum bdev_nvme_op_after_reset {
    1939             :         OP_NONE,
    1940             :         OP_COMPLETE_PENDING_DESTRUCT,
    1941             :         OP_DESTRUCT,
    1942             :         OP_DELAYED_RECONNECT,
    1943             :         OP_FAILOVER,
    1944             : };
    1945             : 
    1946             : typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset;
    1947             : 
    1948             : static _bdev_nvme_op_after_reset
    1949          50 : bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success)
    1950             : {
    1951          50 :         if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    1952             :                 /* Complete pending destruct after reset completes. */
    1953           0 :                 return OP_COMPLETE_PENDING_DESTRUCT;
    1954          50 :         } else if (nvme_ctrlr->pending_failover) {
    1955           3 :                 nvme_ctrlr->pending_failover = false;
    1956           3 :                 nvme_ctrlr->reset_start_tsc = 0;
    1957           3 :                 return OP_FAILOVER;
    1958          47 :         } else if (success || nvme_ctrlr->opts.reconnect_delay_sec == 0) {
    1959          33 :                 nvme_ctrlr->reset_start_tsc = 0;
    1960          33 :                 return OP_NONE;
    1961          14 :         } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
    1962           2 :                 return OP_DESTRUCT;
    1963             :         } else {
    1964          12 :                 if (bdev_nvme_check_fast_io_fail_timeout(nvme_ctrlr)) {
    1965           2 :                         nvme_ctrlr->fast_io_fail_timedout = true;
    1966             :                 }
    1967          12 :                 return OP_DELAYED_RECONNECT;
    1968             :         }
    1969             : }
    1970             : 
    1971             : static int bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug);
    1972             : static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
    1973             : 
    1974             : static int
    1975           9 : bdev_nvme_reconnect_delay_timer_expired(void *ctx)
    1976             : {
    1977           9 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    1978             : 
    1979             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect_delay, nvme_ctrlr->nbdev_ctrlr->name);
    1980           9 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    1981             : 
    1982           9 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    1983             : 
    1984           9 :         if (!nvme_ctrlr->reconnect_is_delayed) {
    1985           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1986           0 :                 return SPDK_POLLER_BUSY;
    1987             :         }
    1988             : 
    1989           9 :         nvme_ctrlr->reconnect_is_delayed = false;
    1990             : 
    1991           9 :         if (nvme_ctrlr->destruct) {
    1992           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1993           0 :                 return SPDK_POLLER_BUSY;
    1994             :         }
    1995             : 
    1996           9 :         assert(nvme_ctrlr->resetting == false);
    1997           9 :         nvme_ctrlr->resetting = true;
    1998             : 
    1999           9 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2000             : 
    2001           9 :         spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
    2002             : 
    2003           9 :         bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2004           9 :         return SPDK_POLLER_BUSY;
    2005             : }
    2006             : 
    2007             : static void
    2008          12 : bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)
    2009             : {
    2010          12 :         spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
    2011             : 
    2012          12 :         assert(nvme_ctrlr->reconnect_is_delayed == false);
    2013          12 :         nvme_ctrlr->reconnect_is_delayed = true;
    2014             : 
    2015          12 :         assert(nvme_ctrlr->reconnect_delay_timer == NULL);
    2016          12 :         nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired,
    2017             :                                             nvme_ctrlr,
    2018             :                                             nvme_ctrlr->opts.reconnect_delay_sec * SPDK_SEC_TO_USEC);
    2019          12 : }
    2020             : 
    2021             : static void remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr);
    2022             : 
    2023             : static void
    2024          48 : _bdev_nvme_reset_ctrlr_complete(struct spdk_io_channel_iter *i, int status)
    2025             : {
    2026          48 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    2027          48 :         bool success = spdk_io_channel_iter_get_ctx(i) == NULL;
    2028          48 :         bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
    2029          48 :         void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
    2030             :         enum bdev_nvme_op_after_reset op_after_reset;
    2031             : 
    2032          48 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2033             : 
    2034          48 :         nvme_ctrlr->ctrlr_op_cb_fn = NULL;
    2035          48 :         nvme_ctrlr->ctrlr_op_cb_arg = NULL;
    2036             : 
    2037          48 :         if (!success) {
    2038          21 :                 SPDK_ERRLOG("Resetting controller failed.\n");
    2039             :         } else {
    2040          27 :                 SPDK_NOTICELOG("Resetting controller successful.\n");
    2041             :         }
    2042             : 
    2043          48 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2044          48 :         nvme_ctrlr->resetting = false;
    2045          48 :         nvme_ctrlr->dont_retry = false;
    2046          48 :         nvme_ctrlr->in_failover = false;
    2047             : 
    2048          48 :         op_after_reset = bdev_nvme_check_op_after_reset(nvme_ctrlr, success);
    2049          48 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2050             : 
    2051             :         /* Delay callbacks when the next operation is a failover. */
    2052          48 :         if (ctrlr_op_cb_fn && op_after_reset != OP_FAILOVER) {
    2053          10 :                 ctrlr_op_cb_fn(ctrlr_op_cb_arg, success ? 0 : -1);
    2054             :         }
    2055             : 
    2056          48 :         switch (op_after_reset) {
    2057           0 :         case OP_COMPLETE_PENDING_DESTRUCT:
    2058           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    2059           0 :                 break;
    2060           2 :         case OP_DESTRUCT:
    2061           2 :                 bdev_nvme_delete_ctrlr(nvme_ctrlr, false);
    2062           2 :                 remove_discovery_entry(nvme_ctrlr);
    2063           2 :                 break;
    2064          12 :         case OP_DELAYED_RECONNECT:
    2065          12 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_start_reconnect_delay_timer);
    2066          12 :                 break;
    2067           3 :         case OP_FAILOVER:
    2068           3 :                 nvme_ctrlr->ctrlr_op_cb_fn = ctrlr_op_cb_fn;
    2069           3 :                 nvme_ctrlr->ctrlr_op_cb_arg = ctrlr_op_cb_arg;
    2070           3 :                 bdev_nvme_failover_ctrlr(nvme_ctrlr);
    2071           3 :                 break;
    2072          31 :         default:
    2073          31 :                 break;
    2074             :         }
    2075          48 : }
    2076             : 
    2077             : static void
    2078          50 : bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
    2079             : {
    2080          50 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2081          50 :         if (!success) {
    2082             :                 /* Connecting the active trid failed. Set the next alternate trid to the
    2083             :                  * active trid if it exists.
    2084             :                  */
    2085          23 :                 if (bdev_nvme_failover_trid(nvme_ctrlr, false, false)) {
    2086             :                         /* The next alternate trid exists and is ready to try. Try it now. */
    2087           2 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2088             : 
    2089           2 :                         nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
    2090           2 :                         return;
    2091             :                 }
    2092             : 
    2093             :                 /* We came here if there is no alternate trid or if the next trid exists but
    2094             :                  * is not ready to try. We will try the active trid after reconnect_delay_sec
    2095             :                  * seconds if it is non-zero or at the next reset call otherwise.
    2096             :                  */
    2097             :         } else {
    2098             :                 /* Connecting the active trid succeeded. Clear the last failed time because it
    2099             :                  * means the trid is failed if its last failed time is non-zero.
    2100             :                  */
    2101          27 :                 nvme_ctrlr->active_path_id->last_failed_tsc = 0;
    2102             :         }
    2103          48 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2104             : 
    2105             :         /* Make sure we clear any pending resets before returning. */
    2106          48 :         spdk_for_each_channel(nvme_ctrlr,
    2107             :                               bdev_nvme_complete_pending_resets,
    2108             :                               success ? NULL : (void *)0x1,
    2109             :                               _bdev_nvme_reset_ctrlr_complete);
    2110             : }
    2111             : 
    2112             : static void
    2113           0 : bdev_nvme_reset_create_qpairs_failed(struct spdk_io_channel_iter *i, int status)
    2114             : {
    2115           0 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    2116             : 
    2117           0 :         bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2118           0 : }
    2119             : 
    2120             : static void
    2121          62 : bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i)
    2122             : {
    2123          62 :         struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
    2124          62 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
    2125             :         struct nvme_qpair *nvme_qpair;
    2126             : 
    2127          62 :         nvme_qpair = ctrlr_ch->qpair;
    2128          62 :         assert(nvme_qpair != NULL);
    2129             : 
    2130          62 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    2131             : 
    2132          62 :         if (nvme_qpair->qpair != NULL) {
    2133          52 :                 if (nvme_qpair->ctrlr->dont_retry) {
    2134          39 :                         spdk_nvme_qpair_set_abort_dnr(nvme_qpair->qpair, true);
    2135             :                 }
    2136          52 :                 spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
    2137             : 
    2138             :                 /* The current full reset sequence will move to the next
    2139             :                  * ctrlr_channel after the qpair is actually disconnected.
    2140             :                  */
    2141          52 :                 assert(ctrlr_ch->reset_iter == NULL);
    2142          52 :                 ctrlr_ch->reset_iter = i;
    2143             :         } else {
    2144          10 :                 spdk_for_each_channel_continue(i, 0);
    2145             :         }
    2146          62 : }
    2147             : 
    2148             : static void
    2149          27 : bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status)
    2150             : {
    2151          27 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    2152             : 
    2153          27 :         if (status == 0) {
    2154          27 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, true);
    2155             :         } else {
    2156             :                 /* Delete the added qpairs and quiesce ctrlr to make the states clean. */
    2157           0 :                 spdk_for_each_channel(nvme_ctrlr,
    2158             :                                       bdev_nvme_reset_destroy_qpair,
    2159             :                                       NULL,
    2160             :                                       bdev_nvme_reset_create_qpairs_failed);
    2161             :         }
    2162          27 : }
    2163             : 
    2164             : static int
    2165          43 : bdev_nvme_reset_check_qpair_connected(void *ctx)
    2166             : {
    2167          43 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx;
    2168             : 
    2169          43 :         if (ctrlr_ch->reset_iter == NULL) {
    2170             :                 /* qpair was already failed to connect and the reset sequence is being aborted. */
    2171           0 :                 assert(ctrlr_ch->connect_poller == NULL);
    2172           0 :                 assert(ctrlr_ch->qpair->qpair == NULL);
    2173           0 :                 return SPDK_POLLER_BUSY;
    2174             :         }
    2175             : 
    2176          43 :         assert(ctrlr_ch->qpair->qpair != NULL);
    2177             : 
    2178          43 :         if (!spdk_nvme_qpair_is_connected(ctrlr_ch->qpair->qpair)) {
    2179           0 :                 return SPDK_POLLER_BUSY;
    2180             :         }
    2181             : 
    2182          43 :         spdk_poller_unregister(&ctrlr_ch->connect_poller);
    2183             : 
    2184             :         /* qpair was completed to connect. Move to the next ctrlr_channel */
    2185          43 :         spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
    2186          43 :         ctrlr_ch->reset_iter = NULL;
    2187             : 
    2188          43 :         if (!g_opts.disable_auto_failback) {
    2189          30 :                 _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
    2190             :         }
    2191             : 
    2192          43 :         return SPDK_POLLER_BUSY;
    2193             : }
    2194             : 
    2195             : static void
    2196          43 : bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i)
    2197             : {
    2198          43 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    2199          43 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
    2200             :         int rc;
    2201             : 
    2202          43 :         rc = bdev_nvme_create_qpair(ctrlr_ch->qpair);
    2203          43 :         if (rc == 0) {
    2204          43 :                 ctrlr_ch->connect_poller = SPDK_POLLER_REGISTER(bdev_nvme_reset_check_qpair_connected,
    2205             :                                            ctrlr_ch, 0);
    2206             : 
    2207             :                 /* The current full reset sequence will move to the next
    2208             :                  * ctrlr_channel after the qpair is actually connected.
    2209             :                  */
    2210          43 :                 assert(ctrlr_ch->reset_iter == NULL);
    2211          43 :                 ctrlr_ch->reset_iter = i;
    2212             :         } else {
    2213           0 :                 spdk_for_each_channel_continue(i, rc);
    2214             :         }
    2215          43 : }
    2216             : 
    2217             : static void
    2218          27 : nvme_ctrlr_check_namespaces(struct nvme_ctrlr *nvme_ctrlr)
    2219             : {
    2220          27 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    2221             :         struct nvme_ns *nvme_ns;
    2222             : 
    2223          39 :         for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    2224             :              nvme_ns != NULL;
    2225          12 :              nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
    2226          12 :                 if (!spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
    2227           1 :                         SPDK_DEBUGLOG(bdev_nvme, "NSID %u was removed during reset.\n", nvme_ns->id);
    2228             :                         /* NS can be added again. Just nullify nvme_ns->ns. */
    2229           1 :                         nvme_ns->ns = NULL;
    2230             :                 }
    2231             :         }
    2232          27 : }
    2233             : 
    2234             : 
    2235             : static int
    2236          49 : bdev_nvme_reconnect_ctrlr_poll(void *arg)
    2237             : {
    2238          49 :         struct nvme_ctrlr *nvme_ctrlr = arg;
    2239          49 :         int rc = -ETIMEDOUT;
    2240             : 
    2241          49 :         if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
    2242             :                 /* Mark the ctrlr as failed. The next call to
    2243             :                  * spdk_nvme_ctrlr_reconnect_poll_async() will then
    2244             :                  * do the necessary cleanup and return failure.
    2245             :                  */
    2246           2 :                 spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
    2247             :         }
    2248             : 
    2249          49 :         rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
    2250          49 :         if (rc == -EAGAIN) {
    2251           0 :                 return SPDK_POLLER_BUSY;
    2252             :         }
    2253             : 
    2254          49 :         spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
    2255          49 :         if (rc == 0) {
    2256          27 :                 nvme_ctrlr_check_namespaces(nvme_ctrlr);
    2257             : 
    2258             :                 /* Recreate all of the I/O queue pairs */
    2259          27 :                 spdk_for_each_channel(nvme_ctrlr,
    2260             :                                       bdev_nvme_reset_create_qpair,
    2261             :                                       NULL,
    2262             :                                       bdev_nvme_reset_create_qpairs_done);
    2263             :         } else {
    2264          22 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2265             :         }
    2266          49 :         return SPDK_POLLER_BUSY;
    2267             : }
    2268             : 
    2269             : static void
    2270          49 : bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2271             : {
    2272          49 :         spdk_nvme_ctrlr_reconnect_async(nvme_ctrlr->ctrlr);
    2273             : 
    2274             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect, nvme_ctrlr->nbdev_ctrlr->name);
    2275          49 :         assert(nvme_ctrlr->reset_detach_poller == NULL);
    2276          49 :         nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_ctrlr_poll,
    2277             :                                           nvme_ctrlr, 0);
    2278          49 : }
    2279             : 
    2280             : static void
    2281          36 : bdev_nvme_reset_destroy_qpair_done(struct spdk_io_channel_iter *i, int status)
    2282             : {
    2283          36 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    2284             : 
    2285             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reset, nvme_ctrlr->nbdev_ctrlr->name);
    2286          36 :         assert(status == 0);
    2287             : 
    2288          36 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2289           0 :                 bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2290             :         } else {
    2291          36 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
    2292             :         }
    2293          36 : }
    2294             : 
    2295             : static void
    2296          36 : bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
    2297             : {
    2298          36 :         spdk_for_each_channel(nvme_ctrlr,
    2299             :                               bdev_nvme_reset_destroy_qpair,
    2300             :                               NULL,
    2301             :                               bdev_nvme_reset_destroy_qpair_done);
    2302          36 : }
    2303             : 
    2304             : static void
    2305           3 : bdev_nvme_reconnect_ctrlr_now(void *ctx)
    2306             : {
    2307           3 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2308             : 
    2309           3 :         assert(nvme_ctrlr->resetting == true);
    2310           3 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2311             : 
    2312           3 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2313             : 
    2314           3 :         spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
    2315             : 
    2316           3 :         bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2317           3 : }
    2318             : 
    2319             : static void
    2320          36 : _bdev_nvme_reset_ctrlr(void *ctx)
    2321             : {
    2322          36 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2323             : 
    2324          36 :         assert(nvme_ctrlr->resetting == true);
    2325          36 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2326             : 
    2327          36 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2328           0 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reset_destroy_qpairs);
    2329             :         } else {
    2330          36 :                 bdev_nvme_reset_destroy_qpairs(nvme_ctrlr);
    2331             :         }
    2332          36 : }
    2333             : 
    2334             : static int
    2335          33 : bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2336             : {
    2337             :         spdk_msg_fn msg_fn;
    2338             : 
    2339          33 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2340          33 :         if (nvme_ctrlr->destruct) {
    2341           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2342           3 :                 return -ENXIO;
    2343             :         }
    2344             : 
    2345          30 :         if (nvme_ctrlr->resetting) {
    2346           5 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2347           5 :                 SPDK_NOTICELOG("Unable to perform reset, already in progress.\n");
    2348           5 :                 return -EBUSY;
    2349             :         }
    2350             : 
    2351          25 :         if (nvme_ctrlr->disabled) {
    2352           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2353           0 :                 SPDK_NOTICELOG("Unable to perform reset. Controller is disabled.\n");
    2354           0 :                 return -EALREADY;
    2355             :         }
    2356             : 
    2357          25 :         nvme_ctrlr->resetting = true;
    2358          25 :         nvme_ctrlr->dont_retry = true;
    2359             : 
    2360          25 :         if (nvme_ctrlr->reconnect_is_delayed) {
    2361           1 :                 SPDK_DEBUGLOG(bdev_nvme, "Reconnect is already scheduled.\n");
    2362           1 :                 msg_fn = bdev_nvme_reconnect_ctrlr_now;
    2363           1 :                 nvme_ctrlr->reconnect_is_delayed = false;
    2364             :         } else {
    2365          24 :                 msg_fn = _bdev_nvme_reset_ctrlr;
    2366          24 :                 assert(nvme_ctrlr->reset_start_tsc == 0);
    2367             :         }
    2368             : 
    2369          25 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2370             : 
    2371          25 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2372             : 
    2373          25 :         spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    2374          25 :         return 0;
    2375             : }
    2376             : 
    2377             : static int
    2378           3 : bdev_nvme_enable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2379             : {
    2380           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2381           3 :         if (nvme_ctrlr->destruct) {
    2382           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2383           0 :                 return -ENXIO;
    2384             :         }
    2385             : 
    2386           3 :         if (nvme_ctrlr->resetting) {
    2387           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2388           0 :                 return -EBUSY;
    2389             :         }
    2390             : 
    2391           3 :         if (!nvme_ctrlr->disabled) {
    2392           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2393           1 :                 return -EALREADY;
    2394             :         }
    2395             : 
    2396           2 :         nvme_ctrlr->disabled = false;
    2397           2 :         nvme_ctrlr->resetting = true;
    2398             : 
    2399           2 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2400             : 
    2401           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2402             : 
    2403           2 :         spdk_thread_send_msg(nvme_ctrlr->thread, bdev_nvme_reconnect_ctrlr_now, nvme_ctrlr);
    2404           2 :         return 0;
    2405             : }
    2406             : 
    2407             : static void
    2408           2 : _bdev_nvme_disable_ctrlr_complete(struct spdk_io_channel_iter *i, int status)
    2409             : {
    2410           2 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    2411           2 :         bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
    2412           2 :         void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
    2413             :         enum bdev_nvme_op_after_reset op_after_disable;
    2414             : 
    2415           2 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2416             : 
    2417           2 :         nvme_ctrlr->ctrlr_op_cb_fn = NULL;
    2418           2 :         nvme_ctrlr->ctrlr_op_cb_arg = NULL;
    2419             : 
    2420           2 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2421             : 
    2422           2 :         nvme_ctrlr->resetting = false;
    2423           2 :         nvme_ctrlr->dont_retry = false;
    2424             : 
    2425           2 :         op_after_disable = bdev_nvme_check_op_after_reset(nvme_ctrlr, true);
    2426             : 
    2427           2 :         nvme_ctrlr->disabled = true;
    2428           2 :         spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
    2429             : 
    2430           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2431             : 
    2432           2 :         if (ctrlr_op_cb_fn) {
    2433           0 :                 ctrlr_op_cb_fn(ctrlr_op_cb_arg, 0);
    2434             :         }
    2435             : 
    2436           2 :         switch (op_after_disable) {
    2437           0 :         case OP_COMPLETE_PENDING_DESTRUCT:
    2438           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    2439           0 :                 break;
    2440           2 :         default:
    2441           2 :                 break;
    2442             :         }
    2443             : 
    2444           2 : }
    2445             : 
    2446             : static void
    2447           2 : bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr)
    2448             : {
    2449             :         /* Make sure we clear any pending resets before returning. */
    2450           2 :         spdk_for_each_channel(nvme_ctrlr,
    2451             :                               bdev_nvme_complete_pending_resets,
    2452             :                               NULL,
    2453             :                               _bdev_nvme_disable_ctrlr_complete);
    2454           2 : }
    2455             : 
    2456             : static void
    2457           1 : bdev_nvme_disable_destroy_qpairs_done(struct spdk_io_channel_iter *i, int status)
    2458             : {
    2459           1 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
    2460             : 
    2461           1 :         assert(status == 0);
    2462             : 
    2463           1 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2464           0 :                 bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
    2465             :         } else {
    2466           1 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_ctrlr_complete);
    2467             :         }
    2468           1 : }
    2469             : 
    2470             : static void
    2471           1 : bdev_nvme_disable_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
    2472             : {
    2473           1 :         spdk_for_each_channel(nvme_ctrlr,
    2474             :                               bdev_nvme_reset_destroy_qpair,
    2475             :                               NULL,
    2476             :                               bdev_nvme_disable_destroy_qpairs_done);
    2477           1 : }
    2478             : 
    2479             : static void
    2480           1 : _bdev_nvme_cancel_reconnect_and_disable_ctrlr(void *ctx)
    2481             : {
    2482           1 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2483             : 
    2484           1 :         assert(nvme_ctrlr->resetting == true);
    2485           1 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2486             : 
    2487           1 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2488             : 
    2489           1 :         bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
    2490           1 : }
    2491             : 
    2492             : static void
    2493           1 : _bdev_nvme_disconnect_and_disable_ctrlr(void *ctx)
    2494             : {
    2495           1 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2496             : 
    2497           1 :         assert(nvme_ctrlr->resetting == true);
    2498           1 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2499             : 
    2500           1 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2501           0 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_destroy_qpairs);
    2502             :         } else {
    2503           1 :                 bdev_nvme_disable_destroy_qpairs(nvme_ctrlr);
    2504             :         }
    2505           1 : }
    2506             : 
    2507             : static int
    2508           5 : bdev_nvme_disable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2509             : {
    2510             :         spdk_msg_fn msg_fn;
    2511             : 
    2512           5 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2513           5 :         if (nvme_ctrlr->destruct) {
    2514           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2515           1 :                 return -ENXIO;
    2516             :         }
    2517             : 
    2518           4 :         if (nvme_ctrlr->resetting) {
    2519           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2520           1 :                 return -EBUSY;
    2521             :         }
    2522             : 
    2523           3 :         if (nvme_ctrlr->disabled) {
    2524           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2525           1 :                 return -EALREADY;
    2526             :         }
    2527             : 
    2528           2 :         nvme_ctrlr->resetting = true;
    2529           2 :         nvme_ctrlr->dont_retry = true;
    2530             : 
    2531           2 :         if (nvme_ctrlr->reconnect_is_delayed) {
    2532           1 :                 msg_fn = _bdev_nvme_cancel_reconnect_and_disable_ctrlr;
    2533           1 :                 nvme_ctrlr->reconnect_is_delayed = false;
    2534             :         } else {
    2535           1 :                 msg_fn = _bdev_nvme_disconnect_and_disable_ctrlr;
    2536             :         }
    2537             : 
    2538           2 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2539             : 
    2540           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2541             : 
    2542           2 :         spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    2543           2 :         return 0;
    2544             : }
    2545             : 
    2546             : static int
    2547          15 : nvme_ctrlr_op(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
    2548             :               bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2549             : {
    2550             :         int rc;
    2551             : 
    2552          15 :         switch (op) {
    2553          14 :         case NVME_CTRLR_OP_RESET:
    2554          14 :                 rc = bdev_nvme_reset_ctrlr(nvme_ctrlr);
    2555          14 :                 break;
    2556           0 :         case NVME_CTRLR_OP_ENABLE:
    2557           0 :                 rc = bdev_nvme_enable_ctrlr(nvme_ctrlr);
    2558           0 :                 break;
    2559           0 :         case NVME_CTRLR_OP_DISABLE:
    2560           0 :                 rc = bdev_nvme_disable_ctrlr(nvme_ctrlr);
    2561           0 :                 break;
    2562           1 :         default:
    2563           1 :                 rc = -EINVAL;
    2564           1 :                 break;
    2565             :         }
    2566             : 
    2567          15 :         if (rc == 0) {
    2568           9 :                 assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
    2569           9 :                 assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
    2570           9 :                 nvme_ctrlr->ctrlr_op_cb_fn = cb_fn;
    2571           9 :                 nvme_ctrlr->ctrlr_op_cb_arg = cb_arg;
    2572             :         }
    2573          15 :         return rc;
    2574             : }
    2575             : 
    2576             : struct nvme_ctrlr_op_rpc_ctx {
    2577             :         struct nvme_ctrlr *nvme_ctrlr;
    2578             :         struct spdk_thread *orig_thread;
    2579             :         enum nvme_ctrlr_op op;
    2580             :         int rc;
    2581             :         bdev_nvme_ctrlr_op_cb cb_fn;
    2582             :         void *cb_arg;
    2583             : };
    2584             : 
    2585             : static void
    2586           4 : _nvme_ctrlr_op_rpc_complete(void *_ctx)
    2587             : {
    2588           4 :         struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
    2589             : 
    2590           4 :         assert(ctx != NULL);
    2591           4 :         assert(ctx->cb_fn != NULL);
    2592             : 
    2593           4 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    2594             : 
    2595           4 :         free(ctx);
    2596           4 : }
    2597             : 
    2598             : static void
    2599           4 : nvme_ctrlr_op_rpc_complete(void *cb_arg, int rc)
    2600             : {
    2601           4 :         struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
    2602             : 
    2603           4 :         ctx->rc = rc;
    2604             : 
    2605           4 :         spdk_thread_send_msg(ctx->orig_thread, _nvme_ctrlr_op_rpc_complete, ctx);
    2606           4 : }
    2607             : 
    2608             : void
    2609           4 : nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
    2610             :                   bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2611             : {
    2612             :         struct nvme_ctrlr_op_rpc_ctx *ctx;
    2613             :         int rc;
    2614             : 
    2615           4 :         assert(cb_fn != NULL);
    2616             : 
    2617           4 :         ctx = calloc(1, sizeof(*ctx));
    2618           4 :         if (ctx == NULL) {
    2619           0 :                 SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
    2620           0 :                 cb_fn(cb_arg, -ENOMEM);
    2621           0 :                 return;
    2622             :         }
    2623             : 
    2624           4 :         ctx->orig_thread = spdk_get_thread();
    2625           4 :         ctx->cb_fn = cb_fn;
    2626           4 :         ctx->cb_arg = cb_arg;
    2627             : 
    2628           4 :         rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_ctrlr_op_rpc_complete, ctx);
    2629           4 :         if (rc == 0) {
    2630           1 :                 return;
    2631           3 :         } else if (rc == -EALREADY) {
    2632           0 :                 rc = 0;
    2633             :         }
    2634             : 
    2635           3 :         nvme_ctrlr_op_rpc_complete(ctx, rc);
    2636             : }
    2637             : 
    2638             : static void nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc);
    2639             : 
    2640             : static void
    2641           2 : _nvme_bdev_ctrlr_op_rpc_continue(void *_ctx)
    2642             : {
    2643           2 :         struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
    2644             :         struct nvme_ctrlr *prev_nvme_ctrlr, *next_nvme_ctrlr;
    2645             :         int rc;
    2646             : 
    2647           2 :         prev_nvme_ctrlr = ctx->nvme_ctrlr;
    2648           2 :         ctx->nvme_ctrlr = NULL;
    2649             : 
    2650           2 :         if (ctx->rc != 0) {
    2651           0 :                 goto complete;
    2652             :         }
    2653             : 
    2654           2 :         next_nvme_ctrlr = TAILQ_NEXT(prev_nvme_ctrlr, tailq);
    2655           2 :         if (next_nvme_ctrlr == NULL) {
    2656           1 :                 goto complete;
    2657             :         }
    2658             : 
    2659           1 :         rc = nvme_ctrlr_op(next_nvme_ctrlr, ctx->op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2660           1 :         if (rc == 0) {
    2661           1 :                 ctx->nvme_ctrlr = next_nvme_ctrlr;
    2662           1 :                 return;
    2663           0 :         } else if (rc == -EALREADY) {
    2664           0 :                 ctx->nvme_ctrlr = next_nvme_ctrlr;
    2665           0 :                 rc = 0;
    2666             :         }
    2667             : 
    2668           0 :         ctx->rc = rc;
    2669             : 
    2670           1 : complete:
    2671           1 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    2672           1 :         free(ctx);
    2673             : }
    2674             : 
    2675             : static void
    2676           2 : nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc)
    2677             : {
    2678           2 :         struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
    2679             : 
    2680           2 :         ctx->rc = rc;
    2681             : 
    2682           2 :         spdk_thread_send_msg(ctx->orig_thread, _nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2683           2 : }
    2684             : 
    2685             : void
    2686           1 : nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op,
    2687             :                        bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2688             : {
    2689             :         struct nvme_ctrlr_op_rpc_ctx *ctx;
    2690             :         struct nvme_ctrlr *nvme_ctrlr;
    2691             :         int rc;
    2692             : 
    2693           1 :         assert(cb_fn != NULL);
    2694             : 
    2695           1 :         ctx = calloc(1, sizeof(*ctx));
    2696           1 :         if (ctx == NULL) {
    2697           0 :                 SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
    2698           0 :                 cb_fn(cb_arg, -ENOMEM);
    2699           0 :                 return;
    2700             :         }
    2701             : 
    2702           1 :         ctx->orig_thread = spdk_get_thread();
    2703           1 :         ctx->op = op;
    2704           1 :         ctx->cb_fn = cb_fn;
    2705           1 :         ctx->cb_arg = cb_arg;
    2706             : 
    2707           1 :         nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
    2708           1 :         assert(nvme_ctrlr != NULL);
    2709             : 
    2710           1 :         rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2711           1 :         if (rc == 0) {
    2712           1 :                 ctx->nvme_ctrlr = nvme_ctrlr;
    2713           1 :                 return;
    2714           0 :         } else if (rc == -EALREADY) {
    2715           0 :                 ctx->nvme_ctrlr = nvme_ctrlr;
    2716           0 :                 rc = 0;
    2717             :         }
    2718             : 
    2719           0 :         nvme_bdev_ctrlr_op_rpc_continue(ctx, rc);
    2720             : }
    2721             : 
    2722             : static int _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio);
    2723             : 
    2724             : static void
    2725           4 : _bdev_nvme_reset_io_complete(struct spdk_io_channel_iter *i, int status)
    2726             : {
    2727           4 :         struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i);
    2728             :         enum spdk_bdev_io_status io_status;
    2729             : 
    2730           4 :         if (bio->cpl.cdw0 == 0) {
    2731           3 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    2732             :         } else {
    2733           1 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    2734             :         }
    2735             : 
    2736           4 :         __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), io_status, NULL);
    2737           4 : }
    2738             : 
    2739             : static void
    2740           8 : bdev_nvme_abort_bdev_channel(struct spdk_io_channel_iter *i)
    2741             : {
    2742           8 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    2743           8 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
    2744             : 
    2745           8 :         bdev_nvme_abort_retry_ios(nbdev_ch);
    2746             : 
    2747           8 :         spdk_for_each_channel_continue(i, 0);
    2748           8 : }
    2749             : 
    2750             : static void
    2751           4 : bdev_nvme_reset_io_complete(struct nvme_bdev_io *bio)
    2752             : {
    2753           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    2754           4 :         struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
    2755             : 
    2756             :         /* Abort all queued I/Os for retry. */
    2757           4 :         spdk_for_each_channel(nbdev,
    2758             :                               bdev_nvme_abort_bdev_channel,
    2759             :                               bio,
    2760             :                               _bdev_nvme_reset_io_complete);
    2761           4 : }
    2762             : 
    2763             : static void
    2764           6 : _bdev_nvme_reset_io_continue(void *ctx)
    2765             : {
    2766           6 :         struct nvme_bdev_io *bio = ctx;
    2767             :         struct nvme_io_path *prev_io_path, *next_io_path;
    2768             :         int rc;
    2769             : 
    2770           6 :         prev_io_path = bio->io_path;
    2771           6 :         bio->io_path = NULL;
    2772             : 
    2773           6 :         if (bio->cpl.cdw0 != 0) {
    2774           1 :                 goto complete;
    2775             :         }
    2776             : 
    2777           5 :         next_io_path = STAILQ_NEXT(prev_io_path, stailq);
    2778           5 :         if (next_io_path == NULL) {
    2779           3 :                 goto complete;
    2780             :         }
    2781             : 
    2782           2 :         rc = _bdev_nvme_reset_io(next_io_path, bio);
    2783           2 :         if (rc == 0) {
    2784           2 :                 return;
    2785             :         }
    2786             : 
    2787           0 :         bio->cpl.cdw0 = 1;
    2788             : 
    2789           4 : complete:
    2790           4 :         bdev_nvme_reset_io_complete(bio);
    2791             : }
    2792             : 
    2793             : static void
    2794           6 : bdev_nvme_reset_io_continue(void *cb_arg, int rc)
    2795             : {
    2796           6 :         struct nvme_bdev_io *bio = cb_arg;
    2797           6 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    2798             : 
    2799           6 :         bio->cpl.cdw0 = (rc == 0) ? 0 : 1;
    2800             : 
    2801           6 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), _bdev_nvme_reset_io_continue, bio);
    2802           6 : }
    2803             : 
    2804             : static int
    2805           9 : _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
    2806             : {
    2807             :         struct nvme_ctrlr_channel *ctrlr_ch;
    2808             :         struct spdk_bdev_io *bdev_io;
    2809             :         int rc;
    2810             : 
    2811           9 :         rc = nvme_ctrlr_op(io_path->qpair->ctrlr, NVME_CTRLR_OP_RESET,
    2812             :                            bdev_nvme_reset_io_continue, bio);
    2813           9 :         if (rc == 0) {
    2814           6 :                 assert(bio->io_path == NULL);
    2815           6 :                 bio->io_path = io_path;
    2816           3 :         } else if (rc == -EBUSY) {
    2817           3 :                 ctrlr_ch = io_path->qpair->ctrlr_ch;
    2818           3 :                 assert(ctrlr_ch != NULL);
    2819             :                 /*
    2820             :                  * Reset call is queued only if it is from the app framework. This is on purpose so that
    2821             :                  * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
    2822             :                  * upper level. If they are in the middle of a reset, we won't try to schedule another one.
    2823             :                  */
    2824           3 :                 bdev_io = spdk_bdev_io_from_ctx(bio);
    2825           3 :                 TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bdev_io, module_link);
    2826           3 :                 rc = 0;
    2827             :         }
    2828             : 
    2829           9 :         return rc;
    2830             : }
    2831             : 
    2832             : static void
    2833           7 : bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio)
    2834             : {
    2835             :         struct nvme_io_path *io_path;
    2836             :         int rc;
    2837             : 
    2838           7 :         bio->cpl.cdw0 = 0;
    2839             : 
    2840             :         /* Reset all nvme_ctrlrs of a bdev controller sequentially. */
    2841           7 :         io_path = STAILQ_FIRST(&nbdev_ch->io_path_list);
    2842           7 :         assert(io_path != NULL);
    2843             : 
    2844           7 :         rc = _bdev_nvme_reset_io(io_path, bio);
    2845           7 :         if (rc != 0) {
    2846             :                 /* If the current nvme_ctrlr is disabled, skip it and move to the next nvme_ctrlr. */
    2847           0 :                 bdev_nvme_reset_io_continue(bio, rc == -EALREADY);
    2848             :         }
    2849           7 : }
    2850             : 
    2851             : static int
    2852          18 : bdev_nvme_failover_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool remove)
    2853             : {
    2854          18 :         if (nvme_ctrlr->destruct) {
    2855             :                 /* Don't bother resetting if the controller is in the process of being destructed. */
    2856           2 :                 return -ENXIO;
    2857             :         }
    2858             : 
    2859          16 :         if (nvme_ctrlr->resetting) {
    2860           3 :                 if (!nvme_ctrlr->in_failover) {
    2861           3 :                         SPDK_NOTICELOG("Reset is already in progress. Defer failover until reset completes.\n");
    2862             : 
    2863             :                         /* Defer failover until reset completes. */
    2864           3 :                         nvme_ctrlr->pending_failover = true;
    2865           3 :                         return -EINPROGRESS;
    2866             :                 } else {
    2867           0 :                         SPDK_NOTICELOG("Unable to perform failover, already in progress.\n");
    2868           0 :                         return -EBUSY;
    2869             :                 }
    2870             :         }
    2871             : 
    2872          13 :         bdev_nvme_failover_trid(nvme_ctrlr, remove, true);
    2873             : 
    2874          13 :         if (nvme_ctrlr->reconnect_is_delayed) {
    2875           1 :                 SPDK_NOTICELOG("Reconnect is already scheduled.\n");
    2876             : 
    2877             :                 /* We rely on the next reconnect for the failover. */
    2878           1 :                 return -EALREADY;
    2879             :         }
    2880             : 
    2881          12 :         if (nvme_ctrlr->disabled) {
    2882           0 :                 SPDK_NOTICELOG("Controller is disabled.\n");
    2883             : 
    2884             :                 /* We rely on the enablement for the failover. */
    2885           0 :                 return -EALREADY;
    2886             :         }
    2887             : 
    2888          12 :         nvme_ctrlr->resetting = true;
    2889          12 :         nvme_ctrlr->in_failover = true;
    2890             : 
    2891          12 :         assert(nvme_ctrlr->reset_start_tsc == 0);
    2892          12 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2893             : 
    2894          12 :         return 0;
    2895             : }
    2896             : 
    2897             : static int
    2898          16 : bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2899             : {
    2900             :         int rc;
    2901             : 
    2902          16 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2903          16 :         rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, false);
    2904          16 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2905             : 
    2906          16 :         if (rc == 0) {
    2907          11 :                 spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset_ctrlr, nvme_ctrlr);
    2908           5 :         } else if (rc == -EALREADY) {
    2909           0 :                 rc = 0;
    2910             :         }
    2911             : 
    2912          16 :         return rc;
    2913             : }
    2914             : 
    2915             : static int bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks,
    2916             :                            uint64_t num_blocks);
    2917             : 
    2918             : static int bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks,
    2919             :                                   uint64_t num_blocks);
    2920             : 
    2921             : static int bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks,
    2922             :                           uint64_t src_offset_blocks,
    2923             :                           uint64_t num_blocks);
    2924             : 
    2925             : static void
    2926           1 : bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
    2927             :                      bool success)
    2928             : {
    2929           1 :         struct nvme_bdev_io *bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    2930           1 :         struct spdk_bdev *bdev = bdev_io->bdev;
    2931             :         int ret;
    2932             : 
    2933           1 :         if (!success) {
    2934           0 :                 ret = -EINVAL;
    2935           0 :                 goto exit;
    2936             :         }
    2937             : 
    2938           1 :         if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
    2939           0 :                 ret = -ENXIO;
    2940           0 :                 goto exit;
    2941             :         }
    2942             : 
    2943           1 :         ret = bdev_nvme_readv(bio,
    2944             :                               bdev_io->u.bdev.iovs,
    2945             :                               bdev_io->u.bdev.iovcnt,
    2946             :                               bdev_io->u.bdev.md_buf,
    2947             :                               bdev_io->u.bdev.num_blocks,
    2948             :                               bdev_io->u.bdev.offset_blocks,
    2949             :                               bdev->dif_check_flags,
    2950             :                               bdev_io->u.bdev.memory_domain,
    2951             :                               bdev_io->u.bdev.memory_domain_ctx,
    2952             :                               bdev_io->u.bdev.accel_sequence);
    2953             : 
    2954           1 : exit:
    2955           1 :         if (spdk_unlikely(ret != 0)) {
    2956           0 :                 bdev_nvme_io_complete(bio, ret);
    2957             :         }
    2958           1 : }
    2959             : 
    2960             : static inline void
    2961          51 : _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
    2962             : {
    2963          51 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    2964          51 :         struct spdk_bdev *bdev = bdev_io->bdev;
    2965             :         struct nvme_bdev_io *nbdev_io_to_abort;
    2966          51 :         int rc = 0;
    2967             : 
    2968          51 :         switch (bdev_io->type) {
    2969           3 :         case SPDK_BDEV_IO_TYPE_READ:
    2970           3 :                 if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) {
    2971           2 :                         rc = bdev_nvme_readv(nbdev_io,
    2972             :                                              bdev_io->u.bdev.iovs,
    2973             :                                              bdev_io->u.bdev.iovcnt,
    2974             :                                              bdev_io->u.bdev.md_buf,
    2975             :                                              bdev_io->u.bdev.num_blocks,
    2976             :                                              bdev_io->u.bdev.offset_blocks,
    2977             :                                              bdev->dif_check_flags,
    2978             :                                              bdev_io->u.bdev.memory_domain,
    2979             :                                              bdev_io->u.bdev.memory_domain_ctx,
    2980             :                                              bdev_io->u.bdev.accel_sequence);
    2981             :                 } else {
    2982           1 :                         spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
    2983           1 :                                              bdev_io->u.bdev.num_blocks * bdev->blocklen);
    2984           1 :                         rc = 0;
    2985             :                 }
    2986           3 :                 break;
    2987          25 :         case SPDK_BDEV_IO_TYPE_WRITE:
    2988          25 :                 rc = bdev_nvme_writev(nbdev_io,
    2989             :                                       bdev_io->u.bdev.iovs,
    2990             :                                       bdev_io->u.bdev.iovcnt,
    2991             :                                       bdev_io->u.bdev.md_buf,
    2992             :                                       bdev_io->u.bdev.num_blocks,
    2993             :                                       bdev_io->u.bdev.offset_blocks,
    2994             :                                       bdev->dif_check_flags,
    2995             :                                       bdev_io->u.bdev.memory_domain,
    2996             :                                       bdev_io->u.bdev.memory_domain_ctx,
    2997             :                                       bdev_io->u.bdev.accel_sequence);
    2998          25 :                 break;
    2999           1 :         case SPDK_BDEV_IO_TYPE_COMPARE:
    3000           1 :                 rc = bdev_nvme_comparev(nbdev_io,
    3001             :                                         bdev_io->u.bdev.iovs,
    3002             :                                         bdev_io->u.bdev.iovcnt,
    3003             :                                         bdev_io->u.bdev.md_buf,
    3004             :                                         bdev_io->u.bdev.num_blocks,
    3005             :                                         bdev_io->u.bdev.offset_blocks,
    3006             :                                         bdev->dif_check_flags);
    3007           1 :                 break;
    3008           2 :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
    3009           2 :                 rc = bdev_nvme_comparev_and_writev(nbdev_io,
    3010             :                                                    bdev_io->u.bdev.iovs,
    3011             :                                                    bdev_io->u.bdev.iovcnt,
    3012             :                                                    bdev_io->u.bdev.fused_iovs,
    3013             :                                                    bdev_io->u.bdev.fused_iovcnt,
    3014             :                                                    bdev_io->u.bdev.md_buf,
    3015             :                                                    bdev_io->u.bdev.num_blocks,
    3016             :                                                    bdev_io->u.bdev.offset_blocks,
    3017             :                                                    bdev->dif_check_flags);
    3018           2 :                 break;
    3019           1 :         case SPDK_BDEV_IO_TYPE_UNMAP:
    3020           1 :                 rc = bdev_nvme_unmap(nbdev_io,
    3021             :                                      bdev_io->u.bdev.offset_blocks,
    3022             :                                      bdev_io->u.bdev.num_blocks);
    3023           1 :                 break;
    3024           0 :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
    3025           0 :                 rc =  bdev_nvme_write_zeroes(nbdev_io,
    3026             :                                              bdev_io->u.bdev.offset_blocks,
    3027             :                                              bdev_io->u.bdev.num_blocks);
    3028           0 :                 break;
    3029           7 :         case SPDK_BDEV_IO_TYPE_RESET:
    3030           7 :                 nbdev_io->io_path = NULL;
    3031           7 :                 bdev_nvme_reset_io(nbdev_ch, nbdev_io);
    3032           7 :                 return;
    3033             : 
    3034           1 :         case SPDK_BDEV_IO_TYPE_FLUSH:
    3035           1 :                 bdev_nvme_io_complete(nbdev_io, 0);
    3036           1 :                 return;
    3037             : 
    3038           0 :         case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
    3039           0 :                 rc = bdev_nvme_zone_appendv(nbdev_io,
    3040             :                                             bdev_io->u.bdev.iovs,
    3041             :                                             bdev_io->u.bdev.iovcnt,
    3042             :                                             bdev_io->u.bdev.md_buf,
    3043             :                                             bdev_io->u.bdev.num_blocks,
    3044             :                                             bdev_io->u.bdev.offset_blocks,
    3045             :                                             bdev->dif_check_flags);
    3046           0 :                 break;
    3047           0 :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
    3048           0 :                 rc = bdev_nvme_get_zone_info(nbdev_io,
    3049             :                                              bdev_io->u.zone_mgmt.zone_id,
    3050             :                                              bdev_io->u.zone_mgmt.num_zones,
    3051           0 :                                              bdev_io->u.zone_mgmt.buf);
    3052           0 :                 break;
    3053           0 :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
    3054           0 :                 rc = bdev_nvme_zone_management(nbdev_io,
    3055             :                                                bdev_io->u.zone_mgmt.zone_id,
    3056             :                                                bdev_io->u.zone_mgmt.zone_action);
    3057           0 :                 break;
    3058           5 :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3059           5 :                 nbdev_io->io_path = NULL;
    3060           5 :                 bdev_nvme_admin_passthru(nbdev_ch,
    3061             :                                          nbdev_io,
    3062             :                                          &bdev_io->u.nvme_passthru.cmd,
    3063             :                                          bdev_io->u.nvme_passthru.buf,
    3064             :                                          bdev_io->u.nvme_passthru.nbytes);
    3065           5 :                 return;
    3066             : 
    3067           0 :         case SPDK_BDEV_IO_TYPE_NVME_IO:
    3068           0 :                 rc = bdev_nvme_io_passthru(nbdev_io,
    3069             :                                            &bdev_io->u.nvme_passthru.cmd,
    3070             :                                            bdev_io->u.nvme_passthru.buf,
    3071             :                                            bdev_io->u.nvme_passthru.nbytes);
    3072           0 :                 break;
    3073           0 :         case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3074           0 :                 rc = bdev_nvme_io_passthru_md(nbdev_io,
    3075             :                                               &bdev_io->u.nvme_passthru.cmd,
    3076             :                                               bdev_io->u.nvme_passthru.buf,
    3077             :                                               bdev_io->u.nvme_passthru.nbytes,
    3078             :                                               bdev_io->u.nvme_passthru.md_buf,
    3079             :                                               bdev_io->u.nvme_passthru.md_len);
    3080           0 :                 break;
    3081           0 :         case SPDK_BDEV_IO_TYPE_NVME_IOV_MD:
    3082           0 :                 rc = bdev_nvme_iov_passthru_md(nbdev_io,
    3083             :                                                &bdev_io->u.nvme_passthru.cmd,
    3084             :                                                bdev_io->u.nvme_passthru.iovs,
    3085             :                                                bdev_io->u.nvme_passthru.iovcnt,
    3086             :                                                bdev_io->u.nvme_passthru.nbytes,
    3087             :                                                bdev_io->u.nvme_passthru.md_buf,
    3088             :                                                bdev_io->u.nvme_passthru.md_len);
    3089           0 :                 break;
    3090           6 :         case SPDK_BDEV_IO_TYPE_ABORT:
    3091           6 :                 nbdev_io->io_path = NULL;
    3092           6 :                 nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
    3093           6 :                 bdev_nvme_abort(nbdev_ch,
    3094             :                                 nbdev_io,
    3095             :                                 nbdev_io_to_abort);
    3096           6 :                 return;
    3097             : 
    3098           0 :         case SPDK_BDEV_IO_TYPE_COPY:
    3099           0 :                 rc = bdev_nvme_copy(nbdev_io,
    3100             :                                     bdev_io->u.bdev.offset_blocks,
    3101             :                                     bdev_io->u.bdev.copy.src_offset_blocks,
    3102             :                                     bdev_io->u.bdev.num_blocks);
    3103           0 :                 break;
    3104           0 :         default:
    3105           0 :                 rc = -EINVAL;
    3106           0 :                 break;
    3107             :         }
    3108             : 
    3109          32 :         if (spdk_unlikely(rc != 0)) {
    3110           0 :                 bdev_nvme_io_complete(nbdev_io, rc);
    3111             :         }
    3112             : }
    3113             : 
    3114             : static void
    3115          58 : bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
    3116             : {
    3117          58 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
    3118          58 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    3119             : 
    3120          58 :         if (spdk_likely(nbdev_io->submit_tsc == 0)) {
    3121          58 :                 nbdev_io->submit_tsc = spdk_bdev_io_get_submit_tsc(bdev_io);
    3122             :         } else {
    3123             :                 /* There are cases where submit_tsc != 0, i.e. retry I/O.
    3124             :                  * We need to update submit_tsc here.
    3125             :                  */
    3126           0 :                 nbdev_io->submit_tsc = spdk_get_ticks();
    3127             :         }
    3128             : 
    3129          58 :         spdk_trace_record(TRACE_BDEV_NVME_IO_START, 0, 0, (uintptr_t)nbdev_io, (uintptr_t)bdev_io);
    3130          58 :         nbdev_io->io_path = bdev_nvme_find_io_path(nbdev_ch);
    3131          58 :         if (spdk_unlikely(!nbdev_io->io_path)) {
    3132          11 :                 if (!bdev_nvme_io_type_is_admin(bdev_io->type)) {
    3133          10 :                         bdev_nvme_io_complete(nbdev_io, -ENXIO);
    3134          10 :                         return;
    3135             :                 }
    3136             : 
    3137             :                 /* Admin commands do not use the optimal I/O path.
    3138             :                  * Simply fall through even if it is not found.
    3139             :                  */
    3140             :         }
    3141             : 
    3142          48 :         _bdev_nvme_submit_request(nbdev_ch, bdev_io);
    3143             : }
    3144             : 
    3145             : static bool
    3146           0 : bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
    3147             : {
    3148           0 :         struct nvme_bdev *nbdev = ctx;
    3149             :         struct nvme_ns *nvme_ns;
    3150             :         struct spdk_nvme_ns *ns;
    3151             :         struct spdk_nvme_ctrlr *ctrlr;
    3152             :         const struct spdk_nvme_ctrlr_data *cdata;
    3153             : 
    3154           0 :         nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    3155           0 :         assert(nvme_ns != NULL);
    3156           0 :         ns = nvme_ns->ns;
    3157           0 :         if (ns == NULL) {
    3158           0 :                 return false;
    3159             :         }
    3160             : 
    3161           0 :         ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    3162             : 
    3163           0 :         switch (io_type) {
    3164           0 :         case SPDK_BDEV_IO_TYPE_READ:
    3165             :         case SPDK_BDEV_IO_TYPE_WRITE:
    3166             :         case SPDK_BDEV_IO_TYPE_RESET:
    3167             :         case SPDK_BDEV_IO_TYPE_FLUSH:
    3168             :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3169             :         case SPDK_BDEV_IO_TYPE_NVME_IO:
    3170             :         case SPDK_BDEV_IO_TYPE_ABORT:
    3171           0 :                 return true;
    3172             : 
    3173           0 :         case SPDK_BDEV_IO_TYPE_COMPARE:
    3174           0 :                 return spdk_nvme_ns_supports_compare(ns);
    3175             : 
    3176           0 :         case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3177           0 :                 return spdk_nvme_ns_get_md_size(ns) ? true : false;
    3178             : 
    3179           0 :         case SPDK_BDEV_IO_TYPE_UNMAP:
    3180           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3181           0 :                 return cdata->oncs.dsm;
    3182             : 
    3183           0 :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
    3184           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3185           0 :                 return cdata->oncs.write_zeroes;
    3186             : 
    3187           0 :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
    3188           0 :                 if (spdk_nvme_ctrlr_get_flags(ctrlr) &
    3189             :                     SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) {
    3190           0 :                         return true;
    3191             :                 }
    3192           0 :                 return false;
    3193             : 
    3194           0 :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
    3195             :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
    3196           0 :                 return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS;
    3197             : 
    3198           0 :         case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
    3199           0 :                 return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS &&
    3200           0 :                        spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
    3201             : 
    3202           0 :         case SPDK_BDEV_IO_TYPE_COPY:
    3203           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3204           0 :                 return cdata->oncs.copy;
    3205             : 
    3206           0 :         default:
    3207           0 :                 return false;
    3208             :         }
    3209             : }
    3210             : 
    3211             : static int
    3212          57 : nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrlr_ch)
    3213             : {
    3214             :         struct nvme_qpair *nvme_qpair;
    3215             :         struct spdk_io_channel *pg_ch;
    3216             :         int rc;
    3217             : 
    3218          57 :         nvme_qpair = calloc(1, sizeof(*nvme_qpair));
    3219          57 :         if (!nvme_qpair) {
    3220           0 :                 SPDK_ERRLOG("Failed to alloc nvme_qpair.\n");
    3221           0 :                 return -1;
    3222             :         }
    3223             : 
    3224          57 :         TAILQ_INIT(&nvme_qpair->io_path_list);
    3225             : 
    3226          57 :         nvme_qpair->ctrlr = nvme_ctrlr;
    3227          57 :         nvme_qpair->ctrlr_ch = ctrlr_ch;
    3228             : 
    3229          57 :         pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);
    3230          57 :         if (!pg_ch) {
    3231           0 :                 free(nvme_qpair);
    3232           0 :                 return -1;
    3233             :         }
    3234             : 
    3235          57 :         nvme_qpair->group = spdk_io_channel_get_ctx(pg_ch);
    3236             : 
    3237             : #ifdef SPDK_CONFIG_VTUNE
    3238             :         nvme_qpair->group->collect_spin_stat = true;
    3239             : #else
    3240          57 :         nvme_qpair->group->collect_spin_stat = false;
    3241             : #endif
    3242             : 
    3243          57 :         if (!nvme_ctrlr->disabled) {
    3244             :                 /* If a nvme_ctrlr is disabled, don't try to create qpair for it. Qpair will
    3245             :                  * be created when it's enabled.
    3246             :                  */
    3247          57 :                 rc = bdev_nvme_create_qpair(nvme_qpair);
    3248          57 :                 if (rc != 0) {
    3249             :                         /* nvme_ctrlr can't create IO qpair if connection is down.
    3250             :                          * If reconnect_delay_sec is non-zero, creating IO qpair is retried
    3251             :                          * after reconnect_delay_sec seconds. If bdev_retry_count is non-zero,
    3252             :                          * submitted IO will be queued until IO qpair is successfully created.
    3253             :                          *
    3254             :                          * Hence, if both are satisfied, ignore the failure.
    3255             :                          */
    3256           0 :                         if (nvme_ctrlr->opts.reconnect_delay_sec == 0 || g_opts.bdev_retry_count == 0) {
    3257           0 :                                 spdk_put_io_channel(pg_ch);
    3258           0 :                                 free(nvme_qpair);
    3259           0 :                                 return rc;
    3260             :                         }
    3261             :                 }
    3262             :         }
    3263             : 
    3264          57 :         TAILQ_INSERT_TAIL(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
    3265             : 
    3266          57 :         ctrlr_ch->qpair = nvme_qpair;
    3267             : 
    3268          57 :         pthread_mutex_lock(&nvme_qpair->ctrlr->mutex);
    3269          57 :         nvme_qpair->ctrlr->ref++;
    3270          57 :         pthread_mutex_unlock(&nvme_qpair->ctrlr->mutex);
    3271             : 
    3272          57 :         return 0;
    3273             : }
    3274             : 
    3275             : static int
    3276          57 : bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
    3277             : {
    3278          57 :         struct nvme_ctrlr *nvme_ctrlr = io_device;
    3279          57 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
    3280             : 
    3281          57 :         TAILQ_INIT(&ctrlr_ch->pending_resets);
    3282             : 
    3283          57 :         return nvme_qpair_create(nvme_ctrlr, ctrlr_ch);
    3284             : }
    3285             : 
    3286             : static void
    3287          57 : nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
    3288             : {
    3289             :         struct nvme_io_path *io_path, *next;
    3290             : 
    3291          57 :         assert(nvme_qpair->group != NULL);
    3292             : 
    3293          92 :         TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
    3294          35 :                 TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
    3295          35 :                 nvme_io_path_free(io_path);
    3296             :         }
    3297             : 
    3298          57 :         TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
    3299             : 
    3300          57 :         spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
    3301             : 
    3302          57 :         nvme_ctrlr_release(nvme_qpair->ctrlr);
    3303             : 
    3304          57 :         free(nvme_qpair);
    3305          57 : }
    3306             : 
    3307             : static void
    3308          57 : bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
    3309             : {
    3310          57 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
    3311             :         struct nvme_qpair *nvme_qpair;
    3312             : 
    3313          57 :         nvme_qpair = ctrlr_ch->qpair;
    3314          57 :         assert(nvme_qpair != NULL);
    3315             : 
    3316          57 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    3317             : 
    3318          57 :         if (nvme_qpair->qpair != NULL) {
    3319          43 :                 if (ctrlr_ch->reset_iter == NULL) {
    3320          43 :                         spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
    3321             :                 } else {
    3322             :                         /* Skip current ctrlr_channel in a full reset sequence because
    3323             :                          * it is being deleted now. The qpair is already being disconnected.
    3324             :                          * We do not have to restart disconnecting it.
    3325             :                          */
    3326           0 :                         spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
    3327             :                 }
    3328             : 
    3329             :                 /* We cannot release a reference to the poll group now.
    3330             :                  * The qpair may be disconnected asynchronously later.
    3331             :                  * We need to poll it until it is actually disconnected.
    3332             :                  * Just detach the qpair from the deleting ctrlr_channel.
    3333             :                  */
    3334          43 :                 nvme_qpair->ctrlr_ch = NULL;
    3335             :         } else {
    3336          14 :                 assert(ctrlr_ch->reset_iter == NULL);
    3337             : 
    3338          14 :                 nvme_qpair_delete(nvme_qpair);
    3339             :         }
    3340          57 : }
    3341             : 
    3342             : static inline struct spdk_io_channel *
    3343           0 : bdev_nvme_get_accel_channel(struct nvme_poll_group *group)
    3344             : {
    3345           0 :         if (spdk_unlikely(!group->accel_channel)) {
    3346           0 :                 group->accel_channel = spdk_accel_get_io_channel();
    3347           0 :                 if (!group->accel_channel) {
    3348           0 :                         SPDK_ERRLOG("Cannot get the accel_channel for bdev nvme polling group=%p\n",
    3349             :                                     group);
    3350           0 :                         return NULL;
    3351             :                 }
    3352             :         }
    3353             : 
    3354           0 :         return group->accel_channel;
    3355             : }
    3356             : 
    3357             : static void
    3358           0 : bdev_nvme_submit_accel_crc32c(void *ctx, uint32_t *dst, struct iovec *iov,
    3359             :                               uint32_t iov_cnt, uint32_t seed,
    3360             :                               spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
    3361             : {
    3362             :         struct spdk_io_channel *accel_ch;
    3363           0 :         struct nvme_poll_group *group = ctx;
    3364             :         int rc;
    3365             : 
    3366           0 :         assert(cb_fn != NULL);
    3367             : 
    3368           0 :         accel_ch = bdev_nvme_get_accel_channel(group);
    3369           0 :         if (spdk_unlikely(accel_ch == NULL)) {
    3370           0 :                 cb_fn(cb_arg, -ENOMEM);
    3371           0 :                 return;
    3372             :         }
    3373             : 
    3374           0 :         rc = spdk_accel_submit_crc32cv(accel_ch, dst, iov, iov_cnt, seed, cb_fn, cb_arg);
    3375           0 :         if (rc) {
    3376             :                 /* For the two cases, spdk_accel_submit_crc32cv does not call the user's cb_fn */
    3377           0 :                 if (rc == -ENOMEM || rc == -EINVAL) {
    3378           0 :                         cb_fn(cb_arg, rc);
    3379             :                 }
    3380           0 :                 SPDK_ERRLOG("Cannot complete the accelerated crc32c operation with iov=%p\n", iov);
    3381             :         }
    3382             : }
    3383             : 
    3384             : static void
    3385           0 : bdev_nvme_finish_sequence(void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
    3386             : {
    3387           0 :         spdk_accel_sequence_finish(seq, cb_fn, cb_arg);
    3388           0 : }
    3389             : 
    3390             : static void
    3391           0 : bdev_nvme_abort_sequence(void *seq)
    3392             : {
    3393           0 :         spdk_accel_sequence_abort(seq);
    3394           0 : }
    3395             : 
    3396             : static void
    3397           0 : bdev_nvme_reverse_sequence(void *seq)
    3398             : {
    3399           0 :         spdk_accel_sequence_reverse(seq);
    3400           0 : }
    3401             : 
    3402             : static int
    3403           0 : bdev_nvme_append_crc32c(void *ctx, void **seq, uint32_t *dst, struct iovec *iovs, uint32_t iovcnt,
    3404             :                         struct spdk_memory_domain *domain, void *domain_ctx, uint32_t seed,
    3405             :                         spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
    3406             : {
    3407             :         struct spdk_io_channel *ch;
    3408           0 :         struct nvme_poll_group *group = ctx;
    3409             : 
    3410           0 :         ch = bdev_nvme_get_accel_channel(group);
    3411           0 :         if (spdk_unlikely(ch == NULL)) {
    3412           0 :                 return -ENOMEM;
    3413             :         }
    3414             : 
    3415           0 :         return spdk_accel_append_crc32c((struct spdk_accel_sequence **)seq, ch, dst, iovs, iovcnt,
    3416             :                                         domain, domain_ctx, seed, cb_fn, cb_arg);
    3417             : }
    3418             : 
    3419             : static struct spdk_nvme_accel_fn_table g_bdev_nvme_accel_fn_table = {
    3420             :         .table_size             = sizeof(struct spdk_nvme_accel_fn_table),
    3421             :         .submit_accel_crc32c    = bdev_nvme_submit_accel_crc32c,
    3422             :         .append_crc32c          = bdev_nvme_append_crc32c,
    3423             :         .finish_sequence        = bdev_nvme_finish_sequence,
    3424             :         .reverse_sequence       = bdev_nvme_reverse_sequence,
    3425             :         .abort_sequence         = bdev_nvme_abort_sequence,
    3426             : };
    3427             : 
    3428             : static int
    3429          42 : bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf)
    3430             : {
    3431          42 :         struct nvme_poll_group *group = ctx_buf;
    3432             : 
    3433          42 :         TAILQ_INIT(&group->qpair_list);
    3434             : 
    3435          42 :         group->group = spdk_nvme_poll_group_create(group, &g_bdev_nvme_accel_fn_table);
    3436          42 :         if (group->group == NULL) {
    3437           0 :                 return -1;
    3438             :         }
    3439             : 
    3440          42 :         group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us);
    3441             : 
    3442          42 :         if (group->poller == NULL) {
    3443           0 :                 spdk_nvme_poll_group_destroy(group->group);
    3444           0 :                 return -1;
    3445             :         }
    3446             : 
    3447          42 :         return 0;
    3448             : }
    3449             : 
    3450             : static void
    3451          42 : bdev_nvme_destroy_poll_group_cb(void *io_device, void *ctx_buf)
    3452             : {
    3453          42 :         struct nvme_poll_group *group = ctx_buf;
    3454             : 
    3455          42 :         assert(TAILQ_EMPTY(&group->qpair_list));
    3456             : 
    3457          42 :         if (group->accel_channel) {
    3458           0 :                 spdk_put_io_channel(group->accel_channel);
    3459             :         }
    3460             : 
    3461          42 :         spdk_poller_unregister(&group->poller);
    3462          42 :         if (spdk_nvme_poll_group_destroy(group->group)) {
    3463           0 :                 SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n");
    3464           0 :                 assert(false);
    3465             :         }
    3466          42 : }
    3467             : 
    3468             : static struct spdk_io_channel *
    3469           0 : bdev_nvme_get_io_channel(void *ctx)
    3470             : {
    3471           0 :         struct nvme_bdev *nvme_bdev = ctx;
    3472             : 
    3473           0 :         return spdk_get_io_channel(nvme_bdev);
    3474             : }
    3475             : 
    3476             : static void *
    3477           0 : bdev_nvme_get_module_ctx(void *ctx)
    3478             : {
    3479           0 :         struct nvme_bdev *nvme_bdev = ctx;
    3480             :         struct nvme_ns *nvme_ns;
    3481             : 
    3482           0 :         if (!nvme_bdev || nvme_bdev->disk.module != &nvme_if) {
    3483           0 :                 return NULL;
    3484             :         }
    3485             : 
    3486           0 :         nvme_ns = TAILQ_FIRST(&nvme_bdev->nvme_ns_list);
    3487           0 :         if (!nvme_ns) {
    3488           0 :                 return NULL;
    3489             :         }
    3490             : 
    3491           0 :         return nvme_ns->ns;
    3492             : }
    3493             : 
    3494             : static const char *
    3495           0 : _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state)
    3496             : {
    3497           0 :         switch (ana_state) {
    3498           0 :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    3499           0 :                 return "optimized";
    3500           0 :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    3501           0 :                 return "non_optimized";
    3502           0 :         case SPDK_NVME_ANA_INACCESSIBLE_STATE:
    3503           0 :                 return "inaccessible";
    3504           0 :         case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
    3505           0 :                 return "persistent_loss";
    3506           0 :         case SPDK_NVME_ANA_CHANGE_STATE:
    3507           0 :                 return "change";
    3508           0 :         default:
    3509           0 :                 return NULL;
    3510             :         }
    3511             : }
    3512             : 
    3513             : static int
    3514           8 : bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
    3515             : {
    3516           8 :         struct spdk_memory_domain **_domains = NULL;
    3517           8 :         struct nvme_bdev *nbdev = ctx;
    3518             :         struct nvme_ns *nvme_ns;
    3519           8 :         int i = 0, _array_size = array_size;
    3520           8 :         int rc = 0;
    3521             : 
    3522          22 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
    3523          14 :                 if (domains && array_size >= i) {
    3524          11 :                         _domains = &domains[i];
    3525             :                 } else {
    3526           3 :                         _domains = NULL;
    3527             :                 }
    3528          14 :                 rc = spdk_nvme_ctrlr_get_memory_domains(nvme_ns->ctrlr->ctrlr, _domains, _array_size);
    3529          14 :                 if (rc > 0) {
    3530          13 :                         i += rc;
    3531          13 :                         if (_array_size >= rc) {
    3532           9 :                                 _array_size -= rc;
    3533             :                         } else {
    3534           4 :                                 _array_size = 0;
    3535             :                         }
    3536           1 :                 } else if (rc < 0) {
    3537           0 :                         return rc;
    3538             :                 }
    3539             :         }
    3540             : 
    3541           8 :         return i;
    3542             : }
    3543             : 
    3544             : static const char *
    3545           0 : nvme_ctrlr_get_state_str(struct nvme_ctrlr *nvme_ctrlr)
    3546             : {
    3547           0 :         if (nvme_ctrlr->destruct) {
    3548           0 :                 return "deleting";
    3549           0 :         } else if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
    3550           0 :                 return "failed";
    3551           0 :         } else if (nvme_ctrlr->resetting) {
    3552           0 :                 return "resetting";
    3553           0 :         } else if (nvme_ctrlr->reconnect_is_delayed > 0) {
    3554           0 :                 return "reconnect_is_delayed";
    3555           0 :         } else if (nvme_ctrlr->disabled) {
    3556           0 :                 return "disabled";
    3557             :         } else {
    3558           0 :                 return "enabled";
    3559             :         }
    3560             : }
    3561             : 
    3562             : void
    3563           0 : nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr)
    3564           0 : {
    3565             :         struct spdk_nvme_transport_id *trid;
    3566             :         const struct spdk_nvme_ctrlr_opts *opts;
    3567             :         const struct spdk_nvme_ctrlr_data *cdata;
    3568             :         struct nvme_path_id *path_id;
    3569             : 
    3570           0 :         spdk_json_write_object_begin(w);
    3571             : 
    3572           0 :         spdk_json_write_named_string(w, "state", nvme_ctrlr_get_state_str(nvme_ctrlr));
    3573             : 
    3574             : #ifdef SPDK_CONFIG_NVME_CUSE
    3575           0 :         size_t cuse_name_size = 128;
    3576           0 :         char cuse_name[cuse_name_size];
    3577             : 
    3578           0 :         int rc = spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr, cuse_name, &cuse_name_size);
    3579           0 :         if (rc == 0) {
    3580           0 :                 spdk_json_write_named_string(w, "cuse_device", cuse_name);
    3581             :         }
    3582             : #endif
    3583           0 :         trid = &nvme_ctrlr->active_path_id->trid;
    3584           0 :         spdk_json_write_named_object_begin(w, "trid");
    3585           0 :         nvme_bdev_dump_trid_json(trid, w);
    3586           0 :         spdk_json_write_object_end(w);
    3587             : 
    3588           0 :         path_id = TAILQ_NEXT(nvme_ctrlr->active_path_id, link);
    3589           0 :         if (path_id != NULL) {
    3590           0 :                 spdk_json_write_named_array_begin(w, "alternate_trids");
    3591             :                 do {
    3592           0 :                         trid = &path_id->trid;
    3593           0 :                         spdk_json_write_object_begin(w);
    3594           0 :                         nvme_bdev_dump_trid_json(trid, w);
    3595           0 :                         spdk_json_write_object_end(w);
    3596             : 
    3597           0 :                         path_id = TAILQ_NEXT(path_id, link);
    3598           0 :                 } while (path_id != NULL);
    3599           0 :                 spdk_json_write_array_end(w);
    3600             :         }
    3601             : 
    3602           0 :         cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    3603           0 :         spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
    3604             : 
    3605           0 :         opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
    3606           0 :         spdk_json_write_named_object_begin(w, "host");
    3607           0 :         spdk_json_write_named_string(w, "nqn", opts->hostnqn);
    3608           0 :         spdk_json_write_named_string(w, "addr", opts->src_addr);
    3609           0 :         spdk_json_write_named_string(w, "svcid", opts->src_svcid);
    3610           0 :         spdk_json_write_object_end(w);
    3611             : 
    3612           0 :         spdk_json_write_object_end(w);
    3613           0 : }
    3614             : 
    3615             : static void
    3616           0 : nvme_namespace_info_json(struct spdk_json_write_ctx *w,
    3617             :                          struct nvme_ns *nvme_ns)
    3618           0 : {
    3619             :         struct spdk_nvme_ns *ns;
    3620             :         struct spdk_nvme_ctrlr *ctrlr;
    3621             :         const struct spdk_nvme_ctrlr_data *cdata;
    3622             :         const struct spdk_nvme_transport_id *trid;
    3623             :         union spdk_nvme_vs_register vs;
    3624             :         const struct spdk_nvme_ns_data *nsdata;
    3625           0 :         char buf[128];
    3626             : 
    3627           0 :         ns = nvme_ns->ns;
    3628           0 :         if (ns == NULL) {
    3629           0 :                 return;
    3630             :         }
    3631             : 
    3632           0 :         ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    3633             : 
    3634           0 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3635           0 :         trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
    3636           0 :         vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr);
    3637             : 
    3638           0 :         spdk_json_write_object_begin(w);
    3639             : 
    3640           0 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    3641           0 :                 spdk_json_write_named_string(w, "pci_address", trid->traddr);
    3642             :         }
    3643             : 
    3644           0 :         spdk_json_write_named_object_begin(w, "trid");
    3645             : 
    3646           0 :         nvme_bdev_dump_trid_json(trid, w);
    3647             : 
    3648           0 :         spdk_json_write_object_end(w);
    3649             : 
    3650             : #ifdef SPDK_CONFIG_NVME_CUSE
    3651           0 :         size_t cuse_name_size = 128;
    3652           0 :         char cuse_name[cuse_name_size];
    3653             : 
    3654           0 :         int rc = spdk_nvme_cuse_get_ns_name(ctrlr, spdk_nvme_ns_get_id(ns),
    3655             :                                             cuse_name, &cuse_name_size);
    3656           0 :         if (rc == 0) {
    3657           0 :                 spdk_json_write_named_string(w, "cuse_device", cuse_name);
    3658             :         }
    3659             : #endif
    3660             : 
    3661           0 :         spdk_json_write_named_object_begin(w, "ctrlr_data");
    3662             : 
    3663           0 :         spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
    3664             : 
    3665           0 :         spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
    3666             : 
    3667           0 :         snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
    3668           0 :         spdk_str_trim(buf);
    3669           0 :         spdk_json_write_named_string(w, "model_number", buf);
    3670             : 
    3671           0 :         snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
    3672           0 :         spdk_str_trim(buf);
    3673           0 :         spdk_json_write_named_string(w, "serial_number", buf);
    3674             : 
    3675           0 :         snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
    3676           0 :         spdk_str_trim(buf);
    3677           0 :         spdk_json_write_named_string(w, "firmware_revision", buf);
    3678             : 
    3679           0 :         if (cdata->subnqn[0] != '\0') {
    3680           0 :                 spdk_json_write_named_string(w, "subnqn", cdata->subnqn);
    3681             :         }
    3682             : 
    3683           0 :         spdk_json_write_named_object_begin(w, "oacs");
    3684             : 
    3685           0 :         spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
    3686           0 :         spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
    3687           0 :         spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
    3688           0 :         spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
    3689             : 
    3690           0 :         spdk_json_write_object_end(w);
    3691             : 
    3692           0 :         spdk_json_write_named_bool(w, "multi_ctrlr", cdata->cmic.multi_ctrlr);
    3693           0 :         spdk_json_write_named_bool(w, "ana_reporting", cdata->cmic.ana_reporting);
    3694             : 
    3695           0 :         spdk_json_write_object_end(w);
    3696             : 
    3697           0 :         spdk_json_write_named_object_begin(w, "vs");
    3698             : 
    3699           0 :         spdk_json_write_name(w, "nvme_version");
    3700           0 :         if (vs.bits.ter) {
    3701           0 :                 spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
    3702             :         } else {
    3703           0 :                 spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
    3704             :         }
    3705             : 
    3706           0 :         spdk_json_write_object_end(w);
    3707             : 
    3708           0 :         nsdata = spdk_nvme_ns_get_data(ns);
    3709             : 
    3710           0 :         spdk_json_write_named_object_begin(w, "ns_data");
    3711             : 
    3712           0 :         spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
    3713             : 
    3714           0 :         if (cdata->cmic.ana_reporting) {
    3715           0 :                 spdk_json_write_named_string(w, "ana_state",
    3716             :                                              _nvme_ana_state_str(nvme_ns->ana_state));
    3717             :         }
    3718             : 
    3719           0 :         spdk_json_write_named_bool(w, "can_share", nsdata->nmic.can_share);
    3720             : 
    3721           0 :         spdk_json_write_object_end(w);
    3722             : 
    3723           0 :         if (cdata->oacs.security) {
    3724           0 :                 spdk_json_write_named_object_begin(w, "security");
    3725             : 
    3726           0 :                 spdk_json_write_named_bool(w, "opal", nvme_ns->bdev->opal);
    3727             : 
    3728           0 :                 spdk_json_write_object_end(w);
    3729             :         }
    3730             : 
    3731           0 :         spdk_json_write_object_end(w);
    3732             : }
    3733             : 
    3734             : static const char *
    3735           0 : nvme_bdev_get_mp_policy_str(struct nvme_bdev *nbdev)
    3736             : {
    3737           0 :         switch (nbdev->mp_policy) {
    3738           0 :         case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
    3739           0 :                 return "active_passive";
    3740           0 :         case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
    3741           0 :                 return "active_active";
    3742           0 :         default:
    3743           0 :                 assert(false);
    3744             :                 return "invalid";
    3745             :         }
    3746             : }
    3747             : 
    3748             : static int
    3749           0 : bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
    3750             : {
    3751           0 :         struct nvme_bdev *nvme_bdev = ctx;
    3752             :         struct nvme_ns *nvme_ns;
    3753             : 
    3754           0 :         pthread_mutex_lock(&nvme_bdev->mutex);
    3755           0 :         spdk_json_write_named_array_begin(w, "nvme");
    3756           0 :         TAILQ_FOREACH(nvme_ns, &nvme_bdev->nvme_ns_list, tailq) {
    3757           0 :                 nvme_namespace_info_json(w, nvme_ns);
    3758             :         }
    3759           0 :         spdk_json_write_array_end(w);
    3760           0 :         spdk_json_write_named_string(w, "mp_policy", nvme_bdev_get_mp_policy_str(nvme_bdev));
    3761           0 :         pthread_mutex_unlock(&nvme_bdev->mutex);
    3762             : 
    3763           0 :         return 0;
    3764             : }
    3765             : 
    3766             : static void
    3767           0 : bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
    3768             : {
    3769             :         /* No config per bdev needed */
    3770           0 : }
    3771             : 
    3772             : static uint64_t
    3773           0 : bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
    3774             : {
    3775           0 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
    3776             :         struct nvme_io_path *io_path;
    3777             :         struct nvme_poll_group *group;
    3778           0 :         uint64_t spin_time = 0;
    3779             : 
    3780           0 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    3781           0 :                 group = io_path->qpair->group;
    3782             : 
    3783           0 :                 if (!group || !group->collect_spin_stat) {
    3784           0 :                         continue;
    3785             :                 }
    3786             : 
    3787           0 :                 if (group->end_ticks != 0) {
    3788           0 :                         group->spin_ticks += (group->end_ticks - group->start_ticks);
    3789           0 :                         group->end_ticks = 0;
    3790             :                 }
    3791             : 
    3792           0 :                 spin_time += group->spin_ticks;
    3793           0 :                 group->start_ticks = 0;
    3794           0 :                 group->spin_ticks = 0;
    3795             :         }
    3796             : 
    3797           0 :         return (spin_time * 1000000ULL) / spdk_get_ticks_hz();
    3798             : }
    3799             : 
    3800             : static void
    3801           0 : bdev_nvme_reset_device_stat(void *ctx)
    3802             : {
    3803           0 :         struct nvme_bdev *nbdev = ctx;
    3804             : 
    3805           0 :         if (nbdev->err_stat != NULL) {
    3806           0 :                 memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat));
    3807             :         }
    3808           0 : }
    3809             : 
    3810             : /* JSON string should be lowercases and underscore delimited string. */
    3811             : static void
    3812           0 : bdev_nvme_format_nvme_status(char *dst, const char *src)
    3813             : {
    3814           0 :         char tmp[256];
    3815             : 
    3816           0 :         spdk_strcpy_replace(dst, 256, src, " - ", "_");
    3817           0 :         spdk_strcpy_replace(tmp, 256, dst, "-", "_");
    3818           0 :         spdk_strcpy_replace(dst, 256, tmp, " ", "_");
    3819           0 :         spdk_strlwr(dst);
    3820           0 : }
    3821             : 
    3822             : static void
    3823           0 : bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w)
    3824             : {
    3825           0 :         struct nvme_bdev *nbdev = ctx;
    3826           0 :         struct spdk_nvme_status status = {};
    3827             :         uint16_t sct, sc;
    3828           0 :         char status_json[256];
    3829             :         const char *status_str;
    3830             : 
    3831           0 :         if (nbdev->err_stat == NULL) {
    3832           0 :                 return;
    3833             :         }
    3834             : 
    3835           0 :         spdk_json_write_named_object_begin(w, "nvme_error");
    3836             : 
    3837           0 :         spdk_json_write_named_object_begin(w, "status_type");
    3838           0 :         for (sct = 0; sct < 8; sct++) {
    3839           0 :                 if (nbdev->err_stat->status_type[sct] == 0) {
    3840           0 :                         continue;
    3841             :                 }
    3842           0 :                 status.sct = sct;
    3843             : 
    3844           0 :                 status_str = spdk_nvme_cpl_get_status_type_string(&status);
    3845           0 :                 assert(status_str != NULL);
    3846           0 :                 bdev_nvme_format_nvme_status(status_json, status_str);
    3847             : 
    3848           0 :                 spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]);
    3849             :         }
    3850           0 :         spdk_json_write_object_end(w);
    3851             : 
    3852           0 :         spdk_json_write_named_object_begin(w, "status_code");
    3853           0 :         for (sct = 0; sct < 4; sct++) {
    3854           0 :                 status.sct = sct;
    3855           0 :                 for (sc = 0; sc < 256; sc++) {
    3856           0 :                         if (nbdev->err_stat->status[sct][sc] == 0) {
    3857           0 :                                 continue;
    3858             :                         }
    3859           0 :                         status.sc = sc;
    3860             : 
    3861           0 :                         status_str = spdk_nvme_cpl_get_status_string(&status);
    3862           0 :                         assert(status_str != NULL);
    3863           0 :                         bdev_nvme_format_nvme_status(status_json, status_str);
    3864             : 
    3865           0 :                         spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]);
    3866             :                 }
    3867             :         }
    3868           0 :         spdk_json_write_object_end(w);
    3869             : 
    3870           0 :         spdk_json_write_object_end(w);
    3871             : }
    3872             : 
    3873             : static bool
    3874           0 : bdev_nvme_accel_sequence_supported(void *ctx, enum spdk_bdev_io_type type)
    3875             : {
    3876           0 :         struct nvme_bdev *nbdev = ctx;
    3877             :         struct spdk_nvme_ctrlr *ctrlr;
    3878             : 
    3879           0 :         if (!g_opts.allow_accel_sequence) {
    3880           0 :                 return false;
    3881             :         }
    3882             : 
    3883           0 :         switch (type) {
    3884           0 :         case SPDK_BDEV_IO_TYPE_WRITE:
    3885             :         case SPDK_BDEV_IO_TYPE_READ:
    3886           0 :                 break;
    3887           0 :         default:
    3888           0 :                 return false;
    3889             :         }
    3890             : 
    3891           0 :         ctrlr = bdev_nvme_get_ctrlr(&nbdev->disk);
    3892           0 :         assert(ctrlr != NULL);
    3893             : 
    3894           0 :         return spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
    3895             : }
    3896             : 
    3897             : static const struct spdk_bdev_fn_table nvmelib_fn_table = {
    3898             :         .destruct                       = bdev_nvme_destruct,
    3899             :         .submit_request                 = bdev_nvme_submit_request,
    3900             :         .io_type_supported              = bdev_nvme_io_type_supported,
    3901             :         .get_io_channel                 = bdev_nvme_get_io_channel,
    3902             :         .dump_info_json                 = bdev_nvme_dump_info_json,
    3903             :         .write_config_json              = bdev_nvme_write_config_json,
    3904             :         .get_spin_time                  = bdev_nvme_get_spin_time,
    3905             :         .get_module_ctx                 = bdev_nvme_get_module_ctx,
    3906             :         .get_memory_domains             = bdev_nvme_get_memory_domains,
    3907             :         .accel_sequence_supported       = bdev_nvme_accel_sequence_supported,
    3908             :         .reset_device_stat              = bdev_nvme_reset_device_stat,
    3909             :         .dump_device_stat_json          = bdev_nvme_dump_device_stat_json,
    3910             : };
    3911             : 
    3912             : typedef int (*bdev_nvme_parse_ana_log_page_cb)(
    3913             :         const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg);
    3914             : 
    3915             : static int
    3916          40 : bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
    3917             :                              bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
    3918             : {
    3919             :         struct spdk_nvme_ana_group_descriptor *copied_desc;
    3920             :         uint8_t *orig_desc;
    3921             :         uint32_t i, desc_size, copy_len;
    3922          40 :         int rc = 0;
    3923             : 
    3924          40 :         if (nvme_ctrlr->ana_log_page == NULL) {
    3925           0 :                 return -EINVAL;
    3926             :         }
    3927             : 
    3928          40 :         copied_desc = nvme_ctrlr->copied_ana_desc;
    3929             : 
    3930          40 :         orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
    3931          40 :         copy_len = nvme_ctrlr->max_ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
    3932             : 
    3933          69 :         for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) {
    3934          65 :                 memcpy(copied_desc, orig_desc, copy_len);
    3935             : 
    3936          65 :                 rc = cb_fn(copied_desc, cb_arg);
    3937          65 :                 if (rc != 0) {
    3938          36 :                         break;
    3939             :                 }
    3940             : 
    3941          29 :                 desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
    3942          29 :                             copied_desc->num_of_nsid * sizeof(uint32_t);
    3943          29 :                 orig_desc += desc_size;
    3944          29 :                 copy_len -= desc_size;
    3945             :         }
    3946             : 
    3947          40 :         return rc;
    3948             : }
    3949             : 
    3950             : static int
    3951           5 : nvme_ns_ana_transition_timedout(void *ctx)
    3952             : {
    3953           5 :         struct nvme_ns *nvme_ns = ctx;
    3954             : 
    3955           5 :         spdk_poller_unregister(&nvme_ns->anatt_timer);
    3956           5 :         nvme_ns->ana_transition_timedout = true;
    3957             : 
    3958           5 :         return SPDK_POLLER_BUSY;
    3959             : }
    3960             : 
    3961             : static void
    3962          45 : _nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
    3963             :                        const struct spdk_nvme_ana_group_descriptor *desc)
    3964             : {
    3965             :         const struct spdk_nvme_ctrlr_data *cdata;
    3966             : 
    3967          45 :         nvme_ns->ana_group_id = desc->ana_group_id;
    3968          45 :         nvme_ns->ana_state = desc->ana_state;
    3969          45 :         nvme_ns->ana_state_updating = false;
    3970             : 
    3971          45 :         switch (nvme_ns->ana_state) {
    3972          38 :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    3973             :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    3974          38 :                 nvme_ns->ana_transition_timedout = false;
    3975          38 :                 spdk_poller_unregister(&nvme_ns->anatt_timer);
    3976          38 :                 break;
    3977             : 
    3978           6 :         case SPDK_NVME_ANA_INACCESSIBLE_STATE:
    3979             :         case SPDK_NVME_ANA_CHANGE_STATE:
    3980           6 :                 if (nvme_ns->anatt_timer != NULL) {
    3981           1 :                         break;
    3982             :                 }
    3983             : 
    3984           5 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
    3985           5 :                 nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
    3986             :                                        nvme_ns,
    3987             :                                        cdata->anatt * SPDK_SEC_TO_USEC);
    3988           5 :                 break;
    3989           1 :         default:
    3990           1 :                 break;
    3991             :         }
    3992          45 : }
    3993             : 
    3994             : static int
    3995          59 : nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg)
    3996             : {
    3997          59 :         struct nvme_ns *nvme_ns = cb_arg;
    3998             :         uint32_t i;
    3999             : 
    4000          59 :         assert(nvme_ns->ns != NULL);
    4001             : 
    4002          81 :         for (i = 0; i < desc->num_of_nsid; i++) {
    4003          58 :                 if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) {
    4004          22 :                         continue;
    4005             :                 }
    4006             : 
    4007          36 :                 _nvme_ns_set_ana_state(nvme_ns, desc);
    4008          36 :                 return 1;
    4009             :         }
    4010             : 
    4011          23 :         return 0;
    4012             : }
    4013             : 
    4014             : static struct spdk_uuid
    4015           5 : nvme_generate_uuid(const char *sn, uint32_t nsid)
    4016             : {
    4017           5 :         struct spdk_uuid new_uuid, namespace_uuid;
    4018           5 :         char merged_str[SPDK_NVME_CTRLR_SN_LEN + NSID_STR_LEN + 1] = {'\0'};
    4019             :         /* This namespace UUID was generated using uuid_generate() method. */
    4020           5 :         const char *namespace_str = {"edaed2de-24bc-4b07-b559-f47ecbe730fd"};
    4021             :         int size;
    4022             : 
    4023           5 :         assert(strlen(sn) <= SPDK_NVME_CTRLR_SN_LEN);
    4024             : 
    4025           5 :         spdk_uuid_set_null(&new_uuid);
    4026           5 :         spdk_uuid_set_null(&namespace_uuid);
    4027             : 
    4028           5 :         size = snprintf(merged_str, sizeof(merged_str), "%s%"PRIu32, sn, nsid);
    4029           5 :         assert(size > 0 && (unsigned long)size < sizeof(merged_str));
    4030             : 
    4031           5 :         spdk_uuid_parse(&namespace_uuid, namespace_str);
    4032             : 
    4033           5 :         spdk_uuid_generate_sha1(&new_uuid, &namespace_uuid, merged_str, size);
    4034             : 
    4035           5 :         return new_uuid;
    4036             : }
    4037             : 
    4038             : static int
    4039          37 : nvme_disk_create(struct spdk_bdev *disk, const char *base_name,
    4040             :                  struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns,
    4041             :                  uint32_t prchk_flags, void *ctx)
    4042             : {
    4043             :         const struct spdk_uuid          *uuid;
    4044             :         const uint8_t *nguid;
    4045             :         const struct spdk_nvme_ctrlr_data *cdata;
    4046             :         const struct spdk_nvme_ns_data  *nsdata;
    4047             :         const struct spdk_nvme_ctrlr_opts *opts;
    4048             :         enum spdk_nvme_csi              csi;
    4049             :         uint32_t atomic_bs, phys_bs, bs;
    4050          37 :         char sn_tmp[SPDK_NVME_CTRLR_SN_LEN + 1] = {'\0'};
    4051             : 
    4052          37 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    4053          37 :         csi = spdk_nvme_ns_get_csi(ns);
    4054          37 :         opts = spdk_nvme_ctrlr_get_opts(ctrlr);
    4055             : 
    4056          37 :         switch (csi) {
    4057          37 :         case SPDK_NVME_CSI_NVM:
    4058          37 :                 disk->product_name = "NVMe disk";
    4059          37 :                 break;
    4060           0 :         case SPDK_NVME_CSI_ZNS:
    4061           0 :                 disk->product_name = "NVMe ZNS disk";
    4062           0 :                 disk->zoned = true;
    4063           0 :                 disk->zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    4064           0 :                 disk->max_zone_append_size = spdk_nvme_zns_ctrlr_get_max_zone_append_size(ctrlr) /
    4065           0 :                                              spdk_nvme_ns_get_extended_sector_size(ns);
    4066           0 :                 disk->max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(ns);
    4067           0 :                 disk->max_active_zones = spdk_nvme_zns_ns_get_max_active_zones(ns);
    4068           0 :                 break;
    4069           0 :         default:
    4070           0 :                 SPDK_ERRLOG("unsupported CSI: %u\n", csi);
    4071           0 :                 return -ENOTSUP;
    4072             :         }
    4073             : 
    4074          37 :         disk->name = spdk_sprintf_alloc("%sn%d", base_name, spdk_nvme_ns_get_id(ns));
    4075          37 :         if (!disk->name) {
    4076           0 :                 return -ENOMEM;
    4077             :         }
    4078             : 
    4079          37 :         disk->write_cache = 0;
    4080          37 :         if (cdata->vwc.present) {
    4081             :                 /* Enable if the Volatile Write Cache exists */
    4082           0 :                 disk->write_cache = 1;
    4083             :         }
    4084          37 :         if (cdata->oncs.write_zeroes) {
    4085           0 :                 disk->max_write_zeroes = UINT16_MAX + 1;
    4086             :         }
    4087          37 :         disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
    4088          37 :         disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns);
    4089          37 :         disk->max_segment_size = spdk_nvme_ctrlr_get_max_xfer_size(ctrlr);
    4090             :         /* NVMe driver will split one request into multiple requests
    4091             :          * based on MDTS and stripe boundary, the bdev layer will use
    4092             :          * max_segment_size and max_num_segments to split one big IO
    4093             :          * into multiple requests, then small request can't run out
    4094             :          * of NVMe internal requests data structure.
    4095             :          */
    4096          37 :         if (opts && opts->io_queue_requests) {
    4097           0 :                 disk->max_num_segments = opts->io_queue_requests / 2;
    4098             :         }
    4099          37 :         disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
    4100             : 
    4101          37 :         nguid = spdk_nvme_ns_get_nguid(ns);
    4102          37 :         if (!nguid) {
    4103          37 :                 uuid = spdk_nvme_ns_get_uuid(ns);
    4104          37 :                 if (uuid) {
    4105          12 :                         disk->uuid = *uuid;
    4106          25 :                 } else if (g_opts.generate_uuids) {
    4107           0 :                         spdk_strcpy_pad(sn_tmp, cdata->sn, SPDK_NVME_CTRLR_SN_LEN, '\0');
    4108           0 :                         disk->uuid = nvme_generate_uuid(sn_tmp, spdk_nvme_ns_get_id(ns));
    4109             :                 }
    4110             :         } else {
    4111           0 :                 memcpy(&disk->uuid, nguid, sizeof(disk->uuid));
    4112             :         }
    4113             : 
    4114          37 :         nsdata = spdk_nvme_ns_get_data(ns);
    4115          37 :         bs = spdk_nvme_ns_get_sector_size(ns);
    4116          37 :         atomic_bs = bs;
    4117          37 :         phys_bs = bs;
    4118          37 :         if (nsdata->nabo == 0) {
    4119          37 :                 if (nsdata->nsfeat.ns_atomic_write_unit && nsdata->nawupf) {
    4120           0 :                         atomic_bs = bs * (1 + nsdata->nawupf);
    4121             :                 } else {
    4122          37 :                         atomic_bs = bs * (1 + cdata->awupf);
    4123             :                 }
    4124             :         }
    4125          37 :         if (nsdata->nsfeat.optperf) {
    4126           0 :                 phys_bs = bs * (1 + nsdata->npwg);
    4127             :         }
    4128          37 :         disk->phys_blocklen = spdk_min(phys_bs, atomic_bs);
    4129             : 
    4130          37 :         disk->md_len = spdk_nvme_ns_get_md_size(ns);
    4131          37 :         if (disk->md_len != 0) {
    4132           0 :                 disk->md_interleave = nsdata->flbas.extended;
    4133           0 :                 disk->dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
    4134           0 :                 if (disk->dif_type != SPDK_DIF_DISABLE) {
    4135           0 :                         disk->dif_is_head_of_md = nsdata->dps.md_start;
    4136           0 :                         disk->dif_check_flags = prchk_flags;
    4137             :                 }
    4138             :         }
    4139             : 
    4140          37 :         if (!(spdk_nvme_ctrlr_get_flags(ctrlr) &
    4141             :               SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED)) {
    4142          37 :                 disk->acwu = 0;
    4143           0 :         } else if (nsdata->nsfeat.ns_atomic_write_unit) {
    4144           0 :                 disk->acwu = nsdata->nacwu + 1; /* 0-based */
    4145             :         } else {
    4146           0 :                 disk->acwu = cdata->acwu + 1; /* 0-based */
    4147             :         }
    4148             : 
    4149          37 :         if (cdata->oncs.copy) {
    4150             :                 /* For now bdev interface allows only single segment copy */
    4151           0 :                 disk->max_copy = nsdata->mssrl;
    4152             :         }
    4153             : 
    4154          37 :         disk->ctxt = ctx;
    4155          37 :         disk->fn_table = &nvmelib_fn_table;
    4156          37 :         disk->module = &nvme_if;
    4157             : 
    4158          37 :         return 0;
    4159             : }
    4160             : 
    4161             : static struct nvme_bdev *
    4162          37 : nvme_bdev_alloc(void)
    4163             : {
    4164             :         struct nvme_bdev *bdev;
    4165             :         int rc;
    4166             : 
    4167          37 :         bdev = calloc(1, sizeof(*bdev));
    4168          37 :         if (!bdev) {
    4169           0 :                 SPDK_ERRLOG("bdev calloc() failed\n");
    4170           0 :                 return NULL;
    4171             :         }
    4172             : 
    4173          37 :         if (g_opts.nvme_error_stat) {
    4174           0 :                 bdev->err_stat = calloc(1, sizeof(struct nvme_error_stat));
    4175           0 :                 if (!bdev->err_stat) {
    4176           0 :                         SPDK_ERRLOG("err_stat calloc() failed\n");
    4177           0 :                         free(bdev);
    4178           0 :                         return NULL;
    4179             :                 }
    4180             :         }
    4181             : 
    4182          37 :         rc = pthread_mutex_init(&bdev->mutex, NULL);
    4183          37 :         if (rc != 0) {
    4184           0 :                 free(bdev->err_stat);
    4185           0 :                 free(bdev);
    4186           0 :                 return NULL;
    4187             :         }
    4188             : 
    4189          37 :         bdev->ref = 1;
    4190          37 :         bdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE;
    4191          37 :         bdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN;
    4192          37 :         bdev->rr_min_io = UINT32_MAX;
    4193          37 :         TAILQ_INIT(&bdev->nvme_ns_list);
    4194             : 
    4195          37 :         return bdev;
    4196             : }
    4197             : 
    4198             : static int
    4199          37 : nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4200             : {
    4201             :         struct nvme_bdev *bdev;
    4202          37 :         struct nvme_bdev_ctrlr *nbdev_ctrlr = nvme_ctrlr->nbdev_ctrlr;
    4203             :         int rc;
    4204             : 
    4205          37 :         bdev = nvme_bdev_alloc();
    4206          37 :         if (bdev == NULL) {
    4207           0 :                 SPDK_ERRLOG("Failed to allocate NVMe bdev\n");
    4208           0 :                 return -ENOMEM;
    4209             :         }
    4210             : 
    4211          37 :         bdev->opal = nvme_ctrlr->opal_dev != NULL;
    4212             : 
    4213          37 :         rc = nvme_disk_create(&bdev->disk, nbdev_ctrlr->name, nvme_ctrlr->ctrlr,
    4214             :                               nvme_ns->ns, nvme_ctrlr->opts.prchk_flags, bdev);
    4215          37 :         if (rc != 0) {
    4216           0 :                 SPDK_ERRLOG("Failed to create NVMe disk\n");
    4217           0 :                 nvme_bdev_free(bdev);
    4218           0 :                 return rc;
    4219             :         }
    4220             : 
    4221          37 :         spdk_io_device_register(bdev,
    4222             :                                 bdev_nvme_create_bdev_channel_cb,
    4223             :                                 bdev_nvme_destroy_bdev_channel_cb,
    4224             :                                 sizeof(struct nvme_bdev_channel),
    4225          37 :                                 bdev->disk.name);
    4226             : 
    4227          37 :         nvme_ns->bdev = bdev;
    4228          37 :         bdev->nsid = nvme_ns->id;
    4229          37 :         TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
    4230             : 
    4231          37 :         bdev->nbdev_ctrlr = nbdev_ctrlr;
    4232          37 :         TAILQ_INSERT_TAIL(&nbdev_ctrlr->bdevs, bdev, tailq);
    4233             : 
    4234          37 :         rc = spdk_bdev_register(&bdev->disk);
    4235          37 :         if (rc != 0) {
    4236           1 :                 SPDK_ERRLOG("spdk_bdev_register() failed\n");
    4237           1 :                 spdk_io_device_unregister(bdev, NULL);
    4238           1 :                 nvme_ns->bdev = NULL;
    4239           1 :                 TAILQ_REMOVE(&nbdev_ctrlr->bdevs, bdev, tailq);
    4240           1 :                 nvme_bdev_free(bdev);
    4241           1 :                 return rc;
    4242             :         }
    4243             : 
    4244          36 :         return 0;
    4245             : }
    4246             : 
    4247             : static bool
    4248          23 : bdev_nvme_compare_ns(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
    4249             : {
    4250             :         const struct spdk_nvme_ns_data *nsdata1, *nsdata2;
    4251             :         const struct spdk_uuid *uuid1, *uuid2;
    4252             : 
    4253          23 :         nsdata1 = spdk_nvme_ns_get_data(ns1);
    4254          23 :         nsdata2 = spdk_nvme_ns_get_data(ns2);
    4255          23 :         uuid1 = spdk_nvme_ns_get_uuid(ns1);
    4256          23 :         uuid2 = spdk_nvme_ns_get_uuid(ns2);
    4257             : 
    4258          45 :         return memcmp(nsdata1->nguid, nsdata2->nguid, sizeof(nsdata1->nguid)) == 0 &&
    4259          22 :                nsdata1->eui64 == nsdata2->eui64 &&
    4260          21 :                ((uuid1 == NULL && uuid2 == NULL) ||
    4261          59 :                 (uuid1 != NULL && uuid2 != NULL && spdk_uuid_compare(uuid1, uuid2) == 0)) &&
    4262          18 :                spdk_nvme_ns_get_csi(ns1) == spdk_nvme_ns_get_csi(ns2);
    4263             : }
    4264             : 
    4265             : static bool
    4266           0 : hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    4267             :                  struct spdk_nvme_ctrlr_opts *opts)
    4268             : {
    4269             :         struct nvme_probe_skip_entry *entry;
    4270             : 
    4271           0 :         TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
    4272           0 :                 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
    4273           0 :                         return false;
    4274             :                 }
    4275             :         }
    4276             : 
    4277           0 :         opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
    4278           0 :         opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
    4279           0 :         opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
    4280           0 :         opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
    4281           0 :         opts->disable_read_ana_log_page = true;
    4282             : 
    4283           0 :         SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr);
    4284             : 
    4285           0 :         return true;
    4286             : }
    4287             : 
    4288             : static void
    4289           0 : nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
    4290             : {
    4291           0 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    4292             : 
    4293           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    4294           0 :                 SPDK_WARNLOG("Abort failed. Resetting controller. sc is %u, sct is %u.\n", cpl->status.sc,
    4295             :                              cpl->status.sct);
    4296           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4297           0 :         } else if (cpl->cdw0 & 0x1) {
    4298           0 :                 SPDK_WARNLOG("Specified command could not be aborted.\n");
    4299           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4300             :         }
    4301           0 : }
    4302             : 
    4303             : static void
    4304           0 : timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
    4305             :            struct spdk_nvme_qpair *qpair, uint16_t cid)
    4306             : {
    4307           0 :         struct nvme_ctrlr *nvme_ctrlr = cb_arg;
    4308             :         union spdk_nvme_csts_register csts;
    4309             :         int rc;
    4310             : 
    4311           0 :         assert(nvme_ctrlr->ctrlr == ctrlr);
    4312             : 
    4313           0 :         SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
    4314             : 
    4315             :         /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O
    4316             :          * queue.  (Note: qpair == NULL when there's an admin cmd timeout.)  Otherwise we
    4317             :          * would submit another fabrics cmd on the admin queue to read CSTS and check for its
    4318             :          * completion recursively.
    4319             :          */
    4320           0 :         if (nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE || qpair != NULL) {
    4321           0 :                 csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
    4322           0 :                 if (csts.bits.cfs) {
    4323           0 :                         SPDK_ERRLOG("Controller Fatal Status, reset required\n");
    4324           0 :                         bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4325           0 :                         return;
    4326             :                 }
    4327             :         }
    4328             : 
    4329           0 :         switch (g_opts.action_on_timeout) {
    4330           0 :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
    4331           0 :                 if (qpair) {
    4332             :                         /* Don't send abort to ctrlr when ctrlr is not available. */
    4333           0 :                         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4334           0 :                         if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
    4335           0 :                                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4336           0 :                                 SPDK_NOTICELOG("Quit abort. Ctrlr is not available.\n");
    4337           0 :                                 return;
    4338             :                         }
    4339           0 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4340             : 
    4341           0 :                         rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
    4342             :                                                        nvme_abort_cpl, nvme_ctrlr);
    4343           0 :                         if (rc == 0) {
    4344           0 :                                 return;
    4345             :                         }
    4346             : 
    4347           0 :                         SPDK_ERRLOG("Unable to send abort. Resetting, rc is %d.\n", rc);
    4348             :                 }
    4349             : 
    4350             :         /* FALLTHROUGH */
    4351             :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
    4352           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4353           0 :                 break;
    4354           0 :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
    4355           0 :                 SPDK_DEBUGLOG(bdev_nvme, "No action for nvme controller timeout.\n");
    4356           0 :                 break;
    4357           0 :         default:
    4358           0 :                 SPDK_ERRLOG("An invalid timeout action value is found.\n");
    4359           0 :                 break;
    4360             :         }
    4361             : }
    4362             : 
    4363             : static struct nvme_ns *
    4364          50 : nvme_ns_alloc(void)
    4365             : {
    4366             :         struct nvme_ns *nvme_ns;
    4367             : 
    4368          50 :         nvme_ns = calloc(1, sizeof(struct nvme_ns));
    4369          50 :         if (nvme_ns == NULL) {
    4370           0 :                 return NULL;
    4371             :         }
    4372             : 
    4373          50 :         if (g_opts.io_path_stat) {
    4374           0 :                 nvme_ns->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
    4375           0 :                 if (nvme_ns->stat == NULL) {
    4376           0 :                         free(nvme_ns);
    4377           0 :                         return NULL;
    4378             :                 }
    4379           0 :                 spdk_bdev_reset_io_stat(nvme_ns->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
    4380             :         }
    4381             : 
    4382          50 :         return nvme_ns;
    4383             : }
    4384             : 
    4385             : static void
    4386          50 : nvme_ns_free(struct nvme_ns *nvme_ns)
    4387             : {
    4388          50 :         free(nvme_ns->stat);
    4389          50 :         free(nvme_ns);
    4390          50 : }
    4391             : 
    4392             : static void
    4393          50 : nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc)
    4394             : {
    4395          50 :         struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
    4396          50 :         struct nvme_async_probe_ctx *ctx = nvme_ns->probe_ctx;
    4397             : 
    4398          50 :         if (rc == 0) {
    4399          48 :                 nvme_ns->probe_ctx = NULL;
    4400          48 :                 pthread_mutex_lock(&nvme_ctrlr->mutex);
    4401          48 :                 nvme_ctrlr->ref++;
    4402          48 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4403             :         } else {
    4404           2 :                 RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4405           2 :                 nvme_ns_free(nvme_ns);
    4406             :         }
    4407             : 
    4408          50 :         if (ctx) {
    4409          49 :                 ctx->populates_in_progress--;
    4410          49 :                 if (ctx->populates_in_progress == 0) {
    4411          12 :                         nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
    4412             :                 }
    4413             :         }
    4414          50 : }
    4415             : 
    4416             : static void
    4417           2 : bdev_nvme_add_io_path(struct spdk_io_channel_iter *i)
    4418             : {
    4419           2 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    4420           2 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
    4421           2 :         struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
    4422             :         int rc;
    4423             : 
    4424           2 :         rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
    4425           2 :         if (rc != 0) {
    4426           0 :                 SPDK_ERRLOG("Failed to add I/O path to bdev_channel dynamically.\n");
    4427             :         }
    4428             : 
    4429           2 :         spdk_for_each_channel_continue(i, rc);
    4430           2 : }
    4431             : 
    4432             : static void
    4433           2 : bdev_nvme_delete_io_path(struct spdk_io_channel_iter *i)
    4434             : {
    4435           2 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    4436           2 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
    4437           2 :         struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
    4438             :         struct nvme_io_path *io_path;
    4439             : 
    4440           2 :         io_path = _bdev_nvme_get_io_path(nbdev_ch, nvme_ns);
    4441           2 :         if (io_path != NULL) {
    4442           2 :                 _bdev_nvme_delete_io_path(nbdev_ch, io_path);
    4443             :         }
    4444             : 
    4445           2 :         spdk_for_each_channel_continue(i, 0);
    4446           2 : }
    4447             : 
    4448             : static void
    4449           0 : bdev_nvme_add_io_path_failed(struct spdk_io_channel_iter *i, int status)
    4450             : {
    4451           0 :         struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
    4452             : 
    4453           0 :         nvme_ctrlr_populate_namespace_done(nvme_ns, -1);
    4454           0 : }
    4455             : 
    4456             : static void
    4457          12 : bdev_nvme_add_io_path_done(struct spdk_io_channel_iter *i, int status)
    4458             : {
    4459          12 :         struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
    4460          12 :         struct nvme_bdev *bdev = spdk_io_channel_iter_get_io_device(i);
    4461             : 
    4462          12 :         if (status == 0) {
    4463          12 :                 nvme_ctrlr_populate_namespace_done(nvme_ns, 0);
    4464             :         } else {
    4465             :                 /* Delete the added io_paths and fail populating the namespace. */
    4466           0 :                 spdk_for_each_channel(bdev,
    4467             :                                       bdev_nvme_delete_io_path,
    4468             :                                       nvme_ns,
    4469             :                                       bdev_nvme_add_io_path_failed);
    4470             :         }
    4471          12 : }
    4472             : 
    4473             : static int
    4474          13 : nvme_bdev_add_ns(struct nvme_bdev *bdev, struct nvme_ns *nvme_ns)
    4475             : {
    4476             :         struct nvme_ns *tmp_ns;
    4477             :         const struct spdk_nvme_ns_data *nsdata;
    4478             : 
    4479          13 :         nsdata = spdk_nvme_ns_get_data(nvme_ns->ns);
    4480          13 :         if (!nsdata->nmic.can_share) {
    4481           0 :                 SPDK_ERRLOG("Namespace cannot be shared.\n");
    4482           0 :                 return -EINVAL;
    4483             :         }
    4484             : 
    4485          13 :         pthread_mutex_lock(&bdev->mutex);
    4486             : 
    4487          13 :         tmp_ns = TAILQ_FIRST(&bdev->nvme_ns_list);
    4488          13 :         assert(tmp_ns != NULL);
    4489             : 
    4490          13 :         if (tmp_ns->ns != NULL && !bdev_nvme_compare_ns(nvme_ns->ns, tmp_ns->ns)) {
    4491           1 :                 pthread_mutex_unlock(&bdev->mutex);
    4492           1 :                 SPDK_ERRLOG("Namespaces are not identical.\n");
    4493           1 :                 return -EINVAL;
    4494             :         }
    4495             : 
    4496          12 :         bdev->ref++;
    4497          12 :         TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
    4498          12 :         nvme_ns->bdev = bdev;
    4499             : 
    4500          12 :         pthread_mutex_unlock(&bdev->mutex);
    4501             : 
    4502             :         /* Add nvme_io_path to nvme_bdev_channels dynamically. */
    4503          12 :         spdk_for_each_channel(bdev,
    4504             :                               bdev_nvme_add_io_path,
    4505             :                               nvme_ns,
    4506             :                               bdev_nvme_add_io_path_done);
    4507             : 
    4508          12 :         return 0;
    4509             : }
    4510             : 
    4511             : static void
    4512          50 : nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4513             : {
    4514             :         struct spdk_nvme_ns     *ns;
    4515             :         struct nvme_bdev        *bdev;
    4516          50 :         int                     rc = 0;
    4517             : 
    4518          50 :         ns = spdk_nvme_ctrlr_get_ns(nvme_ctrlr->ctrlr, nvme_ns->id);
    4519          50 :         if (!ns) {
    4520           0 :                 SPDK_DEBUGLOG(bdev_nvme, "Invalid NS %d\n", nvme_ns->id);
    4521           0 :                 rc = -EINVAL;
    4522           0 :                 goto done;
    4523             :         }
    4524             : 
    4525          50 :         nvme_ns->ns = ns;
    4526          50 :         nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
    4527             : 
    4528          50 :         if (nvme_ctrlr->ana_log_page != NULL) {
    4529          37 :                 bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns);
    4530             :         }
    4531             : 
    4532          50 :         bdev = nvme_bdev_ctrlr_get_bdev(nvme_ctrlr->nbdev_ctrlr, nvme_ns->id);
    4533          50 :         if (bdev == NULL) {
    4534          37 :                 rc = nvme_bdev_create(nvme_ctrlr, nvme_ns);
    4535             :         } else {
    4536          13 :                 rc = nvme_bdev_add_ns(bdev, nvme_ns);
    4537          13 :                 if (rc == 0) {
    4538          12 :                         return;
    4539             :                 }
    4540             :         }
    4541           1 : done:
    4542          38 :         nvme_ctrlr_populate_namespace_done(nvme_ns, rc);
    4543             : }
    4544             : 
    4545             : static void
    4546          48 : nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns)
    4547             : {
    4548          48 :         struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
    4549             : 
    4550          48 :         assert(nvme_ctrlr != NULL);
    4551             : 
    4552          48 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4553             : 
    4554          48 :         RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4555             : 
    4556          48 :         if (nvme_ns->bdev != NULL) {
    4557           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4558           0 :                 return;
    4559             :         }
    4560             : 
    4561          48 :         nvme_ns_free(nvme_ns);
    4562          48 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4563             : 
    4564          48 :         nvme_ctrlr_release(nvme_ctrlr);
    4565             : }
    4566             : 
    4567             : static void
    4568          11 : bdev_nvme_delete_io_path_done(struct spdk_io_channel_iter *i, int status)
    4569             : {
    4570          11 :         struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
    4571             : 
    4572          11 :         nvme_ctrlr_depopulate_namespace_done(nvme_ns);
    4573          11 : }
    4574             : 
    4575             : static void
    4576          48 : nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4577             : {
    4578             :         struct nvme_bdev *bdev;
    4579             : 
    4580          48 :         spdk_poller_unregister(&nvme_ns->anatt_timer);
    4581             : 
    4582          48 :         bdev = nvme_ns->bdev;
    4583          48 :         if (bdev != NULL) {
    4584          44 :                 pthread_mutex_lock(&bdev->mutex);
    4585             : 
    4586          44 :                 assert(bdev->ref > 0);
    4587          44 :                 bdev->ref--;
    4588          44 :                 if (bdev->ref == 0) {
    4589          33 :                         pthread_mutex_unlock(&bdev->mutex);
    4590             : 
    4591          33 :                         spdk_bdev_unregister(&bdev->disk, NULL, NULL);
    4592             :                 } else {
    4593             :                         /* spdk_bdev_unregister() is not called until the last nvme_ns is
    4594             :                          * depopulated. Hence we need to remove nvme_ns from bdev->nvme_ns_list
    4595             :                          * and clear nvme_ns->bdev here.
    4596             :                          */
    4597          11 :                         TAILQ_REMOVE(&bdev->nvme_ns_list, nvme_ns, tailq);
    4598          11 :                         nvme_ns->bdev = NULL;
    4599             : 
    4600          11 :                         pthread_mutex_unlock(&bdev->mutex);
    4601             : 
    4602             :                         /* Delete nvme_io_paths from nvme_bdev_channels dynamically. After that,
    4603             :                          * we call depopulate_namespace_done() to avoid use-after-free.
    4604             :                          */
    4605          11 :                         spdk_for_each_channel(bdev,
    4606             :                                               bdev_nvme_delete_io_path,
    4607             :                                               nvme_ns,
    4608             :                                               bdev_nvme_delete_io_path_done);
    4609          11 :                         return;
    4610             :                 }
    4611             :         }
    4612             : 
    4613          37 :         nvme_ctrlr_depopulate_namespace_done(nvme_ns);
    4614             : }
    4615             : 
    4616             : static void
    4617          61 : nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
    4618             :                                struct nvme_async_probe_ctx *ctx)
    4619             : {
    4620          61 :         struct spdk_nvme_ctrlr  *ctrlr = nvme_ctrlr->ctrlr;
    4621             :         struct nvme_ns  *nvme_ns, *next;
    4622             :         struct spdk_nvme_ns     *ns;
    4623             :         struct nvme_bdev        *bdev;
    4624             :         uint32_t                nsid;
    4625             :         int                     rc;
    4626             :         uint64_t                num_sectors;
    4627             : 
    4628          61 :         if (ctx) {
    4629             :                 /* Initialize this count to 1 to handle the populate functions
    4630             :                  * calling nvme_ctrlr_populate_namespace_done() immediately.
    4631             :                  */
    4632          45 :                 ctx->populates_in_progress = 1;
    4633             :         }
    4634             : 
    4635             :         /* First loop over our existing namespaces and see if they have been
    4636             :          * removed. */
    4637          61 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    4638          65 :         while (nvme_ns != NULL) {
    4639           4 :                 next = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    4640             : 
    4641           4 :                 if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
    4642             :                         /* NS is still there or added again. Its attributes may have changed. */
    4643           3 :                         ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id);
    4644           3 :                         if (nvme_ns->ns != ns) {
    4645           1 :                                 assert(nvme_ns->ns == NULL);
    4646           1 :                                 nvme_ns->ns = ns;
    4647           1 :                                 SPDK_DEBUGLOG(bdev_nvme, "NSID %u was added\n", nvme_ns->id);
    4648             :                         }
    4649             : 
    4650           3 :                         num_sectors = spdk_nvme_ns_get_num_sectors(ns);
    4651           3 :                         bdev = nvme_ns->bdev;
    4652           3 :                         assert(bdev != NULL);
    4653           3 :                         if (bdev->disk.blockcnt != num_sectors) {
    4654           1 :                                 SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %" PRIu64 ", new size %" PRIu64 "\n",
    4655             :                                                nvme_ns->id,
    4656             :                                                bdev->disk.name,
    4657             :                                                bdev->disk.blockcnt,
    4658             :                                                num_sectors);
    4659           1 :                                 rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors);
    4660           1 :                                 if (rc != 0) {
    4661           0 :                                         SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n",
    4662             :                                                     bdev->disk.name, rc);
    4663             :                                 }
    4664             :                         }
    4665             :                 } else {
    4666             :                         /* Namespace was removed */
    4667           1 :                         nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
    4668             :                 }
    4669             : 
    4670           4 :                 nvme_ns = next;
    4671             :         }
    4672             : 
    4673             :         /* Loop through all of the namespaces at the nvme level and see if any of them are new */
    4674          61 :         nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
    4675         114 :         while (nsid != 0) {
    4676          53 :                 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
    4677             : 
    4678          53 :                 if (nvme_ns == NULL) {
    4679             :                         /* Found a new one */
    4680          50 :                         nvme_ns = nvme_ns_alloc();
    4681          50 :                         if (nvme_ns == NULL) {
    4682           0 :                                 SPDK_ERRLOG("Failed to allocate namespace\n");
    4683             :                                 /* This just fails to attach the namespace. It may work on a future attempt. */
    4684           0 :                                 continue;
    4685             :                         }
    4686             : 
    4687          50 :                         nvme_ns->id = nsid;
    4688          50 :                         nvme_ns->ctrlr = nvme_ctrlr;
    4689             : 
    4690          50 :                         nvme_ns->bdev = NULL;
    4691             : 
    4692          50 :                         if (ctx) {
    4693          49 :                                 ctx->populates_in_progress++;
    4694             :                         }
    4695          50 :                         nvme_ns->probe_ctx = ctx;
    4696             : 
    4697          50 :                         RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4698             : 
    4699          50 :                         nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns);
    4700             :                 }
    4701             : 
    4702          53 :                 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid);
    4703             :         }
    4704             : 
    4705          61 :         if (ctx) {
    4706             :                 /* Decrement this count now that the loop is over to account
    4707             :                  * for the one we started with.  If the count is then 0, we
    4708             :                  * know any populate_namespace functions completed immediately,
    4709             :                  * so we'll kick the callback here.
    4710             :                  */
    4711          45 :                 ctx->populates_in_progress--;
    4712          45 :                 if (ctx->populates_in_progress == 0) {
    4713          33 :                         nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
    4714             :                 }
    4715             :         }
    4716             : 
    4717          61 : }
    4718             : 
    4719             : static void
    4720          59 : nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
    4721             : {
    4722             :         struct nvme_ns *nvme_ns, *tmp;
    4723             : 
    4724         106 :         RB_FOREACH_SAFE(nvme_ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp) {
    4725          47 :                 nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
    4726             :         }
    4727          59 : }
    4728             : 
    4729             : static uint32_t
    4730          36 : nvme_ctrlr_get_ana_log_page_size(struct nvme_ctrlr *nvme_ctrlr)
    4731             : {
    4732          36 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    4733             :         const struct spdk_nvme_ctrlr_data *cdata;
    4734          36 :         uint32_t nsid, ns_count = 0;
    4735             : 
    4736          36 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    4737             : 
    4738          80 :         for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
    4739          44 :              nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
    4740          44 :                 ns_count++;
    4741             :         }
    4742             : 
    4743          36 :         return sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
    4744          36 :                sizeof(struct spdk_nvme_ana_group_descriptor) + ns_count *
    4745             :                sizeof(uint32_t);
    4746             : }
    4747             : 
    4748             : static int
    4749           6 : nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
    4750             :                           void *cb_arg)
    4751             : {
    4752           6 :         struct nvme_ctrlr *nvme_ctrlr = cb_arg;
    4753             :         struct nvme_ns *nvme_ns;
    4754             :         uint32_t i, nsid;
    4755             : 
    4756          11 :         for (i = 0; i < desc->num_of_nsid; i++) {
    4757           5 :                 nsid = desc->nsid[i];
    4758           5 :                 if (nsid == 0) {
    4759           0 :                         continue;
    4760             :                 }
    4761             : 
    4762           5 :                 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
    4763             : 
    4764           5 :                 assert(nvme_ns != NULL);
    4765           5 :                 if (nvme_ns == NULL) {
    4766             :                         /* Target told us that an inactive namespace had an ANA change */
    4767           0 :                         continue;
    4768             :                 }
    4769             : 
    4770           5 :                 _nvme_ns_set_ana_state(nvme_ns, desc);
    4771             :         }
    4772             : 
    4773           6 :         return 0;
    4774             : }
    4775             : 
    4776             : static void
    4777           0 : bdev_nvme_disable_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
    4778             : {
    4779             :         struct nvme_ns *nvme_ns;
    4780             : 
    4781           0 :         spdk_free(nvme_ctrlr->ana_log_page);
    4782           0 :         nvme_ctrlr->ana_log_page = NULL;
    4783             : 
    4784           0 :         for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    4785             :              nvme_ns != NULL;
    4786           0 :              nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
    4787           0 :                 nvme_ns->ana_state_updating = false;
    4788           0 :                 nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
    4789             :         }
    4790           0 : }
    4791             : 
    4792             : static void
    4793           3 : nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
    4794             : {
    4795           3 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    4796             : 
    4797           3 :         if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
    4798           3 :                 bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
    4799             :                                              nvme_ctrlr);
    4800             :         } else {
    4801           0 :                 bdev_nvme_disable_read_ana_log_page(nvme_ctrlr);
    4802             :         }
    4803             : 
    4804           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4805             : 
    4806           3 :         assert(nvme_ctrlr->ana_log_page_updating == true);
    4807           3 :         nvme_ctrlr->ana_log_page_updating = false;
    4808             : 
    4809           3 :         if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    4810           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4811             : 
    4812           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    4813             :         } else {
    4814           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4815             : 
    4816           3 :                 bdev_nvme_clear_io_path_caches(nvme_ctrlr);
    4817             :         }
    4818           3 : }
    4819             : 
    4820             : static int
    4821           6 : nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
    4822             : {
    4823             :         uint32_t ana_log_page_size;
    4824             :         int rc;
    4825             : 
    4826           6 :         if (nvme_ctrlr->ana_log_page == NULL) {
    4827           0 :                 return -EINVAL;
    4828             :         }
    4829             : 
    4830           6 :         ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
    4831             : 
    4832           6 :         if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
    4833           0 :                 SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
    4834             :                             ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
    4835           0 :                 return -EINVAL;
    4836             :         }
    4837             : 
    4838           6 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4839           6 :         if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
    4840             :             nvme_ctrlr->ana_log_page_updating) {
    4841           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4842           3 :                 return -EBUSY;
    4843             :         }
    4844             : 
    4845           3 :         nvme_ctrlr->ana_log_page_updating = true;
    4846           3 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4847             : 
    4848           3 :         rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
    4849             :                                               SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
    4850             :                                               SPDK_NVME_GLOBAL_NS_TAG,
    4851           3 :                                               nvme_ctrlr->ana_log_page,
    4852             :                                               ana_log_page_size, 0,
    4853             :                                               nvme_ctrlr_read_ana_log_page_done,
    4854             :                                               nvme_ctrlr);
    4855           3 :         if (rc != 0) {
    4856           0 :                 nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
    4857             :         }
    4858             : 
    4859           3 :         return rc;
    4860             : }
    4861             : 
    4862             : static void
    4863           0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
    4864             : {
    4865           0 : }
    4866             : 
    4867             : struct bdev_nvme_set_preferred_path_ctx {
    4868             :         struct spdk_bdev_desc *desc;
    4869             :         struct nvme_ns *nvme_ns;
    4870             :         bdev_nvme_set_preferred_path_cb cb_fn;
    4871             :         void *cb_arg;
    4872             : };
    4873             : 
    4874             : static void
    4875           3 : bdev_nvme_set_preferred_path_done(struct spdk_io_channel_iter *i, int status)
    4876             : {
    4877           3 :         struct bdev_nvme_set_preferred_path_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
    4878             : 
    4879           3 :         assert(ctx != NULL);
    4880           3 :         assert(ctx->desc != NULL);
    4881           3 :         assert(ctx->cb_fn != NULL);
    4882             : 
    4883           3 :         spdk_bdev_close(ctx->desc);
    4884             : 
    4885           3 :         ctx->cb_fn(ctx->cb_arg, status);
    4886             : 
    4887           3 :         free(ctx);
    4888           3 : }
    4889             : 
    4890             : static void
    4891           2 : _bdev_nvme_set_preferred_path(struct spdk_io_channel_iter *i)
    4892             : {
    4893           2 :         struct bdev_nvme_set_preferred_path_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
    4894           2 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    4895           2 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
    4896             :         struct nvme_io_path *io_path, *prev;
    4897             : 
    4898           2 :         prev = NULL;
    4899           3 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    4900           3 :                 if (io_path->nvme_ns == ctx->nvme_ns) {
    4901           2 :                         break;
    4902             :                 }
    4903           1 :                 prev = io_path;
    4904             :         }
    4905             : 
    4906           2 :         if (io_path != NULL) {
    4907           2 :                 if (prev != NULL) {
    4908           1 :                         STAILQ_REMOVE_AFTER(&nbdev_ch->io_path_list, prev, stailq);
    4909           1 :                         STAILQ_INSERT_HEAD(&nbdev_ch->io_path_list, io_path, stailq);
    4910             :                 }
    4911             : 
    4912             :                 /* We can set io_path to nbdev_ch->current_io_path directly here.
    4913             :                  * However, it needs to be conditional. To simplify the code,
    4914             :                  * just clear nbdev_ch->current_io_path and let find_io_path()
    4915             :                  * fill it.
    4916             :                  *
    4917             :                  * Automatic failback may be disabled. Hence even if the io_path is
    4918             :                  * already at the head, clear nbdev_ch->current_io_path.
    4919             :                  */
    4920           2 :                 bdev_nvme_clear_current_io_path(nbdev_ch);
    4921             :         }
    4922             : 
    4923           2 :         spdk_for_each_channel_continue(i, 0);
    4924           2 : }
    4925             : 
    4926             : static struct nvme_ns *
    4927           3 : bdev_nvme_set_preferred_ns(struct nvme_bdev *nbdev, uint16_t cntlid)
    4928             : {
    4929             :         struct nvme_ns *nvme_ns, *prev;
    4930             :         const struct spdk_nvme_ctrlr_data *cdata;
    4931             : 
    4932           3 :         prev = NULL;
    4933           6 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
    4934           6 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
    4935             : 
    4936           6 :                 if (cdata->cntlid == cntlid) {
    4937           3 :                         break;
    4938             :                 }
    4939           3 :                 prev = nvme_ns;
    4940             :         }
    4941             : 
    4942           3 :         if (nvme_ns != NULL && prev != NULL) {
    4943           2 :                 TAILQ_REMOVE(&nbdev->nvme_ns_list, nvme_ns, tailq);
    4944           2 :                 TAILQ_INSERT_HEAD(&nbdev->nvme_ns_list, nvme_ns, tailq);
    4945             :         }
    4946             : 
    4947           3 :         return nvme_ns;
    4948             : }
    4949             : 
    4950             : /* This function supports only multipath mode. There is only a single I/O path
    4951             :  * for each NVMe-oF controller. Hence, just move the matched I/O path to the
    4952             :  * head of the I/O path list for each NVMe bdev channel.
    4953             :  *
    4954             :  * NVMe bdev channel may be acquired after completing this function. move the
    4955             :  * matched namespace to the head of the namespace list for the NVMe bdev too.
    4956             :  */
    4957             : void
    4958           3 : bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid,
    4959             :                              bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg)
    4960             : {
    4961             :         struct bdev_nvme_set_preferred_path_ctx *ctx;
    4962             :         struct spdk_bdev *bdev;
    4963             :         struct nvme_bdev *nbdev;
    4964           3 :         int rc = 0;
    4965             : 
    4966           3 :         assert(cb_fn != NULL);
    4967             : 
    4968           3 :         ctx = calloc(1, sizeof(*ctx));
    4969           3 :         if (ctx == NULL) {
    4970           0 :                 SPDK_ERRLOG("Failed to alloc context.\n");
    4971           0 :                 rc = -ENOMEM;
    4972           0 :                 goto err_alloc;
    4973             :         }
    4974             : 
    4975           3 :         ctx->cb_fn = cb_fn;
    4976           3 :         ctx->cb_arg = cb_arg;
    4977             : 
    4978           3 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
    4979           3 :         if (rc != 0) {
    4980           0 :                 SPDK_ERRLOG("Failed to open bdev %s.\n", name);
    4981           0 :                 goto err_open;
    4982             :         }
    4983             : 
    4984           3 :         bdev = spdk_bdev_desc_get_bdev(ctx->desc);
    4985             : 
    4986           3 :         if (bdev->module != &nvme_if) {
    4987           0 :                 SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
    4988           0 :                 rc = -ENODEV;
    4989           0 :                 goto err_bdev;
    4990             :         }
    4991             : 
    4992           3 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    4993             : 
    4994           3 :         pthread_mutex_lock(&nbdev->mutex);
    4995             : 
    4996           3 :         ctx->nvme_ns = bdev_nvme_set_preferred_ns(nbdev, cntlid);
    4997           3 :         if (ctx->nvme_ns == NULL) {
    4998           0 :                 pthread_mutex_unlock(&nbdev->mutex);
    4999             : 
    5000           0 :                 SPDK_ERRLOG("bdev %s does not have namespace to controller %u.\n", name, cntlid);
    5001           0 :                 rc = -ENODEV;
    5002           0 :                 goto err_bdev;
    5003             :         }
    5004             : 
    5005           3 :         pthread_mutex_unlock(&nbdev->mutex);
    5006             : 
    5007           3 :         spdk_for_each_channel(nbdev,
    5008             :                               _bdev_nvme_set_preferred_path,
    5009             :                               ctx,
    5010             :                               bdev_nvme_set_preferred_path_done);
    5011           3 :         return;
    5012             : 
    5013           0 : err_bdev:
    5014           0 :         spdk_bdev_close(ctx->desc);
    5015           0 : err_open:
    5016           0 :         free(ctx);
    5017           0 : err_alloc:
    5018           0 :         cb_fn(cb_arg, rc);
    5019             : }
    5020             : 
    5021             : struct bdev_nvme_set_multipath_policy_ctx {
    5022             :         struct spdk_bdev_desc *desc;
    5023             :         bdev_nvme_set_multipath_policy_cb cb_fn;
    5024             :         void *cb_arg;
    5025             : };
    5026             : 
    5027             : static void
    5028           3 : bdev_nvme_set_multipath_policy_done(struct spdk_io_channel_iter *i, int status)
    5029             : {
    5030           3 :         struct bdev_nvme_set_multipath_policy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
    5031             : 
    5032           3 :         assert(ctx != NULL);
    5033           3 :         assert(ctx->desc != NULL);
    5034           3 :         assert(ctx->cb_fn != NULL);
    5035             : 
    5036           3 :         spdk_bdev_close(ctx->desc);
    5037             : 
    5038           3 :         ctx->cb_fn(ctx->cb_arg, status);
    5039             : 
    5040           3 :         free(ctx);
    5041           3 : }
    5042             : 
    5043             : static void
    5044           1 : _bdev_nvme_set_multipath_policy(struct spdk_io_channel_iter *i)
    5045             : {
    5046           1 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
    5047           1 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
    5048           1 :         struct nvme_bdev *nbdev = spdk_io_channel_get_io_device(_ch);
    5049             : 
    5050           1 :         nbdev_ch->mp_policy = nbdev->mp_policy;
    5051           1 :         nbdev_ch->mp_selector = nbdev->mp_selector;
    5052           1 :         nbdev_ch->rr_min_io = nbdev->rr_min_io;
    5053           1 :         bdev_nvme_clear_current_io_path(nbdev_ch);
    5054             : 
    5055           1 :         spdk_for_each_channel_continue(i, 0);
    5056           1 : }
    5057             : 
    5058             : void
    5059           3 : bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy policy,
    5060             :                                enum bdev_nvme_multipath_selector selector, uint32_t rr_min_io,
    5061             :                                bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
    5062             : {
    5063             :         struct bdev_nvme_set_multipath_policy_ctx *ctx;
    5064             :         struct spdk_bdev *bdev;
    5065             :         struct nvme_bdev *nbdev;
    5066             :         int rc;
    5067             : 
    5068           3 :         assert(cb_fn != NULL);
    5069             : 
    5070           3 :         if (policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE && selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    5071           1 :                 if (rr_min_io == UINT32_MAX) {
    5072           0 :                         rr_min_io = 1;
    5073           1 :                 } else if (rr_min_io == 0) {
    5074           0 :                         rc = -EINVAL;
    5075           0 :                         goto exit;
    5076             :                 }
    5077           2 :         } else if (rr_min_io != UINT32_MAX) {
    5078           0 :                 rc = -EINVAL;
    5079           0 :                 goto exit;
    5080             :         }
    5081             : 
    5082           3 :         ctx = calloc(1, sizeof(*ctx));
    5083           3 :         if (ctx == NULL) {
    5084           0 :                 SPDK_ERRLOG("Failed to alloc context.\n");
    5085           0 :                 rc = -ENOMEM;
    5086           0 :                 goto exit;
    5087             :         }
    5088             : 
    5089           3 :         ctx->cb_fn = cb_fn;
    5090           3 :         ctx->cb_arg = cb_arg;
    5091             : 
    5092           3 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
    5093           3 :         if (rc != 0) {
    5094           0 :                 SPDK_ERRLOG("Failed to open bdev %s.\n", name);
    5095           0 :                 rc = -ENODEV;
    5096           0 :                 goto err_open;
    5097             :         }
    5098             : 
    5099           3 :         bdev = spdk_bdev_desc_get_bdev(ctx->desc);
    5100           3 :         if (bdev->module != &nvme_if) {
    5101           0 :                 SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
    5102           0 :                 rc = -ENODEV;
    5103           0 :                 goto err_module;
    5104             :         }
    5105           3 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    5106             : 
    5107           3 :         pthread_mutex_lock(&nbdev->mutex);
    5108           3 :         nbdev->mp_policy = policy;
    5109           3 :         nbdev->mp_selector = selector;
    5110           3 :         nbdev->rr_min_io = rr_min_io;
    5111           3 :         pthread_mutex_unlock(&nbdev->mutex);
    5112             : 
    5113           3 :         spdk_for_each_channel(nbdev,
    5114             :                               _bdev_nvme_set_multipath_policy,
    5115             :                               ctx,
    5116             :                               bdev_nvme_set_multipath_policy_done);
    5117           3 :         return;
    5118             : 
    5119           0 : err_module:
    5120           0 :         spdk_bdev_close(ctx->desc);
    5121           0 : err_open:
    5122           0 :         free(ctx);
    5123           0 : exit:
    5124           0 :         cb_fn(cb_arg, rc);
    5125             : }
    5126             : 
    5127             : static void
    5128           3 : aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
    5129             : {
    5130           3 :         struct nvme_ctrlr *nvme_ctrlr           = arg;
    5131             :         union spdk_nvme_async_event_completion  event;
    5132             : 
    5133           3 :         if (spdk_nvme_cpl_is_error(cpl)) {
    5134           0 :                 SPDK_WARNLOG("AER request execute failed\n");
    5135           0 :                 return;
    5136             :         }
    5137             : 
    5138           3 :         event.raw = cpl->cdw0;
    5139           3 :         if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
    5140           3 :             (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
    5141           2 :                 nvme_ctrlr_populate_namespaces(nvme_ctrlr, NULL);
    5142           1 :         } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
    5143           1 :                    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
    5144           1 :                 nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
    5145             :         }
    5146             : }
    5147             : 
    5148             : static void
    5149          51 : populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, int rc)
    5150             : {
    5151          51 :         if (ctx->cb_fn) {
    5152          51 :                 ctx->cb_fn(ctx->cb_ctx, ctx->reported_bdevs, rc);
    5153             :         }
    5154             : 
    5155          51 :         ctx->namespaces_populated = true;
    5156          51 :         if (ctx->probe_done) {
    5157             :                 /* The probe was already completed, so we need to free the context
    5158             :                  * here.  This can happen for cases like OCSSD, where we need to
    5159             :                  * send additional commands to the SSD after attach.
    5160             :                  */
    5161          31 :                 free(ctx);
    5162             :         }
    5163          51 : }
    5164             : 
    5165             : static void
    5166          59 : nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr,
    5167             :                        struct nvme_async_probe_ctx *ctx)
    5168             : {
    5169          59 :         spdk_io_device_register(nvme_ctrlr,
    5170             :                                 bdev_nvme_create_ctrlr_channel_cb,
    5171             :                                 bdev_nvme_destroy_ctrlr_channel_cb,
    5172             :                                 sizeof(struct nvme_ctrlr_channel),
    5173          59 :                                 nvme_ctrlr->nbdev_ctrlr->name);
    5174             : 
    5175          59 :         nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx);
    5176          59 : }
    5177             : 
    5178             : static void
    5179          30 : nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl)
    5180             : {
    5181          30 :         struct nvme_ctrlr *nvme_ctrlr = _ctx;
    5182          30 :         struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx;
    5183             : 
    5184          30 :         nvme_ctrlr->probe_ctx = NULL;
    5185             : 
    5186          30 :         if (spdk_nvme_cpl_is_error(cpl)) {
    5187           0 :                 nvme_ctrlr_delete(nvme_ctrlr);
    5188             : 
    5189           0 :                 if (ctx != NULL) {
    5190           0 :                         ctx->reported_bdevs = 0;
    5191           0 :                         populate_namespaces_cb(ctx, -1);
    5192             :                 }
    5193           0 :                 return;
    5194             :         }
    5195             : 
    5196          30 :         nvme_ctrlr_create_done(nvme_ctrlr, ctx);
    5197             : }
    5198             : 
    5199             : static int
    5200          30 : nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
    5201             :                              struct nvme_async_probe_ctx *ctx)
    5202             : {
    5203          30 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    5204             :         const struct spdk_nvme_ctrlr_data *cdata;
    5205             :         uint32_t ana_log_page_size;
    5206             : 
    5207          30 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5208             : 
    5209             :         /* Set buffer size enough to include maximum number of allowed namespaces. */
    5210          30 :         ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
    5211          30 :                             sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->mnan *
    5212             :                             sizeof(uint32_t);
    5213             : 
    5214          30 :         nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL,
    5215             :                                                 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5216          30 :         if (nvme_ctrlr->ana_log_page == NULL) {
    5217           0 :                 SPDK_ERRLOG("could not allocate ANA log page buffer\n");
    5218           0 :                 return -ENXIO;
    5219             :         }
    5220             : 
    5221             :         /* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned.
    5222             :          * Hence copy each descriptor to a temporary area when parsing it.
    5223             :          *
    5224             :          * Allocate a buffer whose size is as large as ANA log page buffer because
    5225             :          * we do not know the size of a descriptor until actually reading it.
    5226             :          */
    5227          30 :         nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
    5228          30 :         if (nvme_ctrlr->copied_ana_desc == NULL) {
    5229           0 :                 SPDK_ERRLOG("could not allocate a buffer to parse ANA descriptor\n");
    5230           0 :                 return -ENOMEM;
    5231             :         }
    5232             : 
    5233          30 :         nvme_ctrlr->max_ana_log_page_size = ana_log_page_size;
    5234             : 
    5235          30 :         nvme_ctrlr->probe_ctx = ctx;
    5236             : 
    5237             :         /* Then, set the read size only to include the current active namespaces. */
    5238          30 :         ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
    5239             : 
    5240          30 :         if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
    5241           0 :                 SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
    5242             :                             ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
    5243           0 :                 return -EINVAL;
    5244             :         }
    5245             : 
    5246          30 :         return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
    5247             :                                                 SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
    5248             :                                                 SPDK_NVME_GLOBAL_NS_TAG,
    5249          30 :                                                 nvme_ctrlr->ana_log_page,
    5250             :                                                 ana_log_page_size, 0,
    5251             :                                                 nvme_ctrlr_init_ana_log_page_done,
    5252             :                                                 nvme_ctrlr);
    5253             : }
    5254             : 
    5255             : /* hostnqn and subnqn were already verified before attaching a controller.
    5256             :  * Hence check only the multipath capability and cntlid here.
    5257             :  */
    5258             : static bool
    5259          16 : bdev_nvme_check_multipath(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct spdk_nvme_ctrlr *ctrlr)
    5260             : {
    5261             :         struct nvme_ctrlr *tmp;
    5262             :         const struct spdk_nvme_ctrlr_data *cdata, *tmp_cdata;
    5263             : 
    5264          16 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5265             : 
    5266          16 :         if (!cdata->cmic.multi_ctrlr) {
    5267           0 :                 SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
    5268           0 :                 return false;
    5269             :         }
    5270             : 
    5271          33 :         TAILQ_FOREACH(tmp, &nbdev_ctrlr->ctrlrs, tailq) {
    5272          18 :                 tmp_cdata = spdk_nvme_ctrlr_get_data(tmp->ctrlr);
    5273             : 
    5274          18 :                 if (!tmp_cdata->cmic.multi_ctrlr) {
    5275           0 :                         SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
    5276           0 :                         return false;
    5277             :                 }
    5278          18 :                 if (cdata->cntlid == tmp_cdata->cntlid) {
    5279           1 :                         SPDK_ERRLOG("cntlid %u are duplicated.\n", tmp_cdata->cntlid);
    5280           1 :                         return false;
    5281             :                 }
    5282             :         }
    5283             : 
    5284          15 :         return true;
    5285             : }
    5286             : 
    5287             : static int
    5288          60 : nvme_bdev_ctrlr_create(const char *name, struct nvme_ctrlr *nvme_ctrlr)
    5289             : {
    5290             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    5291          60 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    5292          60 :         int rc = 0;
    5293             : 
    5294          60 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    5295             : 
    5296          60 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    5297          60 :         if (nbdev_ctrlr != NULL) {
    5298          16 :                 if (!bdev_nvme_check_multipath(nbdev_ctrlr, ctrlr)) {
    5299           1 :                         rc = -EINVAL;
    5300           1 :                         goto exit;
    5301             :                 }
    5302             :         } else {
    5303          44 :                 nbdev_ctrlr = calloc(1, sizeof(*nbdev_ctrlr));
    5304          44 :                 if (nbdev_ctrlr == NULL) {
    5305           0 :                         SPDK_ERRLOG("Failed to allocate nvme_bdev_ctrlr.\n");
    5306           0 :                         rc = -ENOMEM;
    5307           0 :                         goto exit;
    5308             :                 }
    5309          44 :                 nbdev_ctrlr->name = strdup(name);
    5310          44 :                 if (nbdev_ctrlr->name == NULL) {
    5311           0 :                         SPDK_ERRLOG("Failed to allocate name of nvme_bdev_ctrlr.\n");
    5312           0 :                         free(nbdev_ctrlr);
    5313           0 :                         goto exit;
    5314             :                 }
    5315          44 :                 TAILQ_INIT(&nbdev_ctrlr->ctrlrs);
    5316          44 :                 TAILQ_INIT(&nbdev_ctrlr->bdevs);
    5317          44 :                 TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
    5318             :         }
    5319          59 :         nvme_ctrlr->nbdev_ctrlr = nbdev_ctrlr;
    5320          59 :         TAILQ_INSERT_TAIL(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
    5321          60 : exit:
    5322          60 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    5323          60 :         return rc;
    5324             : }
    5325             : 
    5326             : static int
    5327          60 : nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
    5328             :                   const char *name,
    5329             :                   const struct spdk_nvme_transport_id *trid,
    5330             :                   struct nvme_async_probe_ctx *ctx)
    5331             : {
    5332             :         struct nvme_ctrlr *nvme_ctrlr;
    5333             :         struct nvme_path_id *path_id;
    5334             :         const struct spdk_nvme_ctrlr_data *cdata;
    5335             :         int rc;
    5336             : 
    5337          60 :         nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
    5338          60 :         if (nvme_ctrlr == NULL) {
    5339           0 :                 SPDK_ERRLOG("Failed to allocate device struct\n");
    5340           0 :                 return -ENOMEM;
    5341             :         }
    5342             : 
    5343          60 :         rc = pthread_mutex_init(&nvme_ctrlr->mutex, NULL);
    5344          60 :         if (rc != 0) {
    5345           0 :                 free(nvme_ctrlr);
    5346           0 :                 return rc;
    5347             :         }
    5348             : 
    5349          60 :         TAILQ_INIT(&nvme_ctrlr->trids);
    5350             : 
    5351          60 :         RB_INIT(&nvme_ctrlr->namespaces);
    5352             : 
    5353          60 :         path_id = calloc(1, sizeof(*path_id));
    5354          60 :         if (path_id == NULL) {
    5355           0 :                 SPDK_ERRLOG("Failed to allocate trid entry pointer\n");
    5356           0 :                 rc = -ENOMEM;
    5357           0 :                 goto err;
    5358             :         }
    5359             : 
    5360          60 :         path_id->trid = *trid;
    5361          60 :         if (ctx != NULL) {
    5362          46 :                 memcpy(path_id->hostid.hostaddr, ctx->drv_opts.src_addr, sizeof(path_id->hostid.hostaddr));
    5363          46 :                 memcpy(path_id->hostid.hostsvcid, ctx->drv_opts.src_svcid, sizeof(path_id->hostid.hostsvcid));
    5364             :         }
    5365          60 :         nvme_ctrlr->active_path_id = path_id;
    5366          60 :         TAILQ_INSERT_HEAD(&nvme_ctrlr->trids, path_id, link);
    5367             : 
    5368          60 :         nvme_ctrlr->thread = spdk_get_thread();
    5369          60 :         nvme_ctrlr->ctrlr = ctrlr;
    5370          60 :         nvme_ctrlr->ref = 1;
    5371             : 
    5372          60 :         if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
    5373           0 :                 SPDK_ERRLOG("OCSSDs are not supported");
    5374           0 :                 rc = -ENOTSUP;
    5375           0 :                 goto err;
    5376             :         }
    5377             : 
    5378          60 :         if (ctx != NULL) {
    5379          46 :                 memcpy(&nvme_ctrlr->opts, &ctx->bdev_opts, sizeof(ctx->bdev_opts));
    5380             :         } else {
    5381          14 :                 bdev_nvme_get_default_ctrlr_opts(&nvme_ctrlr->opts);
    5382             :         }
    5383             : 
    5384          60 :         nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr,
    5385             :                                           g_opts.nvme_adminq_poll_period_us);
    5386             : 
    5387          60 :         if (g_opts.timeout_us > 0) {
    5388             :                 /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */
    5389             :                 /* If timeout_admin_us is 0 (not specified), admin uses same timeout as IO. */
    5390           0 :                 uint64_t adm_timeout_us = (g_opts.timeout_admin_us == 0) ?
    5391           0 :                                           g_opts.timeout_us : g_opts.timeout_admin_us;
    5392           0 :                 spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
    5393             :                                 adm_timeout_us, timeout_cb, nvme_ctrlr);
    5394             :         }
    5395             : 
    5396          60 :         spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
    5397          60 :         spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr);
    5398             : 
    5399          60 :         if (spdk_nvme_ctrlr_get_flags(ctrlr) &
    5400             :             SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
    5401           0 :                 nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr);
    5402             :         }
    5403             : 
    5404          60 :         rc = nvme_bdev_ctrlr_create(name, nvme_ctrlr);
    5405          60 :         if (rc != 0) {
    5406           1 :                 goto err;
    5407             :         }
    5408             : 
    5409          59 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5410             : 
    5411          59 :         if (cdata->cmic.ana_reporting) {
    5412          30 :                 rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx);
    5413          30 :                 if (rc == 0) {
    5414          30 :                         return 0;
    5415             :                 }
    5416             :         } else {
    5417          29 :                 nvme_ctrlr_create_done(nvme_ctrlr, ctx);
    5418          29 :                 return 0;
    5419             :         }
    5420             : 
    5421           1 : err:
    5422           1 :         nvme_ctrlr_delete(nvme_ctrlr);
    5423           1 :         return rc;
    5424             : }
    5425             : 
    5426             : void
    5427          56 : bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts)
    5428             : {
    5429          56 :         opts->prchk_flags = 0;
    5430          56 :         opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
    5431          56 :         opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
    5432          56 :         opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
    5433          56 : }
    5434             : 
    5435             : static void
    5436           0 : attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    5437             :           struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
    5438             : {
    5439             :         char *name;
    5440             : 
    5441           0 :         name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
    5442           0 :         if (!name) {
    5443           0 :                 SPDK_ERRLOG("Failed to assign name to NVMe device\n");
    5444           0 :                 return;
    5445             :         }
    5446             : 
    5447           0 :         if (nvme_ctrlr_create(ctrlr, name, trid, NULL) == 0) {
    5448           0 :                 SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name);
    5449             :         } else {
    5450           0 :                 SPDK_ERRLOG("Failed to attach to %s (%s)\n", trid->traddr, name);
    5451             :         }
    5452             : 
    5453           0 :         free(name);
    5454             : }
    5455             : 
    5456             : static void
    5457          59 : _nvme_ctrlr_destruct(void *ctx)
    5458             : {
    5459          59 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    5460             : 
    5461          59 :         nvme_ctrlr_depopulate_namespaces(nvme_ctrlr);
    5462          59 :         nvme_ctrlr_release(nvme_ctrlr);
    5463          59 : }
    5464             : 
    5465             : static int
    5466          56 : bdev_nvme_delete_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
    5467             : {
    5468             :         struct nvme_probe_skip_entry *entry;
    5469             : 
    5470             :         /* The controller's destruction was already started */
    5471          56 :         if (nvme_ctrlr->destruct) {
    5472           0 :                 return -EALREADY;
    5473             :         }
    5474             : 
    5475          56 :         if (!hotplug &&
    5476          56 :             nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
    5477           0 :                 entry = calloc(1, sizeof(*entry));
    5478           0 :                 if (!entry) {
    5479           0 :                         return -ENOMEM;
    5480             :                 }
    5481           0 :                 entry->trid = nvme_ctrlr->active_path_id->trid;
    5482           0 :                 TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
    5483             :         }
    5484             : 
    5485          56 :         nvme_ctrlr->destruct = true;
    5486          56 :         return 0;
    5487             : }
    5488             : 
    5489             : static int
    5490           2 : bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
    5491             : {
    5492             :         int rc;
    5493             : 
    5494           2 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5495           2 :         rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, hotplug);
    5496           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5497             : 
    5498           2 :         if (rc == 0) {
    5499           2 :                 _nvme_ctrlr_destruct(nvme_ctrlr);
    5500           0 :         } else if (rc == -EALREADY) {
    5501           0 :                 rc = 0;
    5502             :         }
    5503             : 
    5504           2 :         return rc;
    5505             : }
    5506             : 
    5507             : static void
    5508           0 : remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
    5509             : {
    5510           0 :         struct nvme_ctrlr *nvme_ctrlr = cb_ctx;
    5511             : 
    5512           0 :         bdev_nvme_delete_ctrlr(nvme_ctrlr, true);
    5513           0 : }
    5514             : 
    5515             : static int
    5516           0 : bdev_nvme_hotplug_probe(void *arg)
    5517             : {
    5518           0 :         if (g_hotplug_probe_ctx == NULL) {
    5519           0 :                 spdk_poller_unregister(&g_hotplug_probe_poller);
    5520           0 :                 return SPDK_POLLER_IDLE;
    5521             :         }
    5522             : 
    5523           0 :         if (spdk_nvme_probe_poll_async(g_hotplug_probe_ctx) != -EAGAIN) {
    5524           0 :                 g_hotplug_probe_ctx = NULL;
    5525           0 :                 spdk_poller_unregister(&g_hotplug_probe_poller);
    5526             :         }
    5527             : 
    5528           0 :         return SPDK_POLLER_BUSY;
    5529             : }
    5530             : 
    5531             : static int
    5532           0 : bdev_nvme_hotplug(void *arg)
    5533             : {
    5534           0 :         struct spdk_nvme_transport_id trid_pcie;
    5535             : 
    5536           0 :         if (g_hotplug_probe_ctx) {
    5537           0 :                 return SPDK_POLLER_BUSY;
    5538             :         }
    5539             : 
    5540           0 :         memset(&trid_pcie, 0, sizeof(trid_pcie));
    5541           0 :         spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
    5542             : 
    5543           0 :         g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
    5544             :                               hotplug_probe_cb, attach_cb, NULL);
    5545             : 
    5546           0 :         if (g_hotplug_probe_ctx) {
    5547           0 :                 assert(g_hotplug_probe_poller == NULL);
    5548           0 :                 g_hotplug_probe_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug_probe, NULL, 1000);
    5549             :         }
    5550             : 
    5551           0 :         return SPDK_POLLER_BUSY;
    5552             : }
    5553             : 
    5554             : void
    5555           0 : bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
    5556             : {
    5557           0 :         *opts = g_opts;
    5558           0 : }
    5559             : 
    5560             : static bool bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
    5561             :                 uint32_t reconnect_delay_sec,
    5562             :                 uint32_t fast_io_fail_timeout_sec);
    5563             : 
    5564             : static int
    5565           0 : bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
    5566             : {
    5567           0 :         if ((opts->timeout_us == 0) && (opts->timeout_admin_us != 0)) {
    5568             :                 /* Can't set timeout_admin_us without also setting timeout_us */
    5569           0 :                 SPDK_WARNLOG("Invalid options: Can't have (timeout_us == 0) with (timeout_admin_us > 0)\n");
    5570           0 :                 return -EINVAL;
    5571             :         }
    5572             : 
    5573           0 :         if (opts->bdev_retry_count < -1) {
    5574           0 :                 SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n");
    5575           0 :                 return -EINVAL;
    5576             :         }
    5577             : 
    5578           0 :         if (!bdev_nvme_check_io_error_resiliency_params(opts->ctrlr_loss_timeout_sec,
    5579             :                         opts->reconnect_delay_sec,
    5580             :                         opts->fast_io_fail_timeout_sec)) {
    5581           0 :                 return -EINVAL;
    5582             :         }
    5583             : 
    5584           0 :         return 0;
    5585             : }
    5586             : 
    5587             : int
    5588           0 : bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
    5589             : {
    5590             :         int ret;
    5591             : 
    5592           0 :         ret = bdev_nvme_validate_opts(opts);
    5593           0 :         if (ret) {
    5594           0 :                 SPDK_WARNLOG("Failed to set nvme opts.\n");
    5595           0 :                 return ret;
    5596             :         }
    5597             : 
    5598           0 :         if (g_bdev_nvme_init_thread != NULL) {
    5599           0 :                 if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    5600           0 :                         return -EPERM;
    5601             :                 }
    5602             :         }
    5603             : 
    5604           0 :         if (opts->rdma_srq_size != 0 ||
    5605           0 :             opts->rdma_max_cq_size != 0) {
    5606           0 :                 struct spdk_nvme_transport_opts drv_opts;
    5607             : 
    5608           0 :                 spdk_nvme_transport_get_opts(&drv_opts, sizeof(drv_opts));
    5609           0 :                 if (opts->rdma_srq_size != 0) {
    5610           0 :                         drv_opts.rdma_srq_size = opts->rdma_srq_size;
    5611             :                 }
    5612           0 :                 if (opts->rdma_max_cq_size != 0) {
    5613           0 :                         drv_opts.rdma_max_cq_size = opts->rdma_max_cq_size;
    5614             :                 }
    5615             : 
    5616           0 :                 ret = spdk_nvme_transport_set_opts(&drv_opts, sizeof(drv_opts));
    5617           0 :                 if (ret) {
    5618           0 :                         SPDK_ERRLOG("Failed to set NVMe transport opts.\n");
    5619           0 :                         return ret;
    5620             :                 }
    5621             :         }
    5622             : 
    5623           0 :         g_opts = *opts;
    5624             : 
    5625           0 :         return 0;
    5626             : }
    5627             : 
    5628             : struct set_nvme_hotplug_ctx {
    5629             :         uint64_t period_us;
    5630             :         bool enabled;
    5631             :         spdk_msg_fn fn;
    5632             :         void *fn_ctx;
    5633             : };
    5634             : 
    5635             : static void
    5636           0 : set_nvme_hotplug_period_cb(void *_ctx)
    5637             : {
    5638           0 :         struct set_nvme_hotplug_ctx *ctx = _ctx;
    5639             : 
    5640           0 :         spdk_poller_unregister(&g_hotplug_poller);
    5641           0 :         if (ctx->enabled) {
    5642           0 :                 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us);
    5643             :         }
    5644             : 
    5645           0 :         g_nvme_hotplug_poll_period_us = ctx->period_us;
    5646           0 :         g_nvme_hotplug_enabled = ctx->enabled;
    5647           0 :         if (ctx->fn) {
    5648           0 :                 ctx->fn(ctx->fn_ctx);
    5649             :         }
    5650             : 
    5651           0 :         free(ctx);
    5652           0 : }
    5653             : 
    5654             : int
    5655           0 : bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
    5656             : {
    5657             :         struct set_nvme_hotplug_ctx *ctx;
    5658             : 
    5659           0 :         if (enabled == true && !spdk_process_is_primary()) {
    5660           0 :                 return -EPERM;
    5661             :         }
    5662             : 
    5663           0 :         ctx = calloc(1, sizeof(*ctx));
    5664           0 :         if (ctx == NULL) {
    5665           0 :                 return -ENOMEM;
    5666             :         }
    5667             : 
    5668           0 :         period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
    5669           0 :         ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
    5670           0 :         ctx->enabled = enabled;
    5671           0 :         ctx->fn = cb;
    5672           0 :         ctx->fn_ctx = cb_ctx;
    5673             : 
    5674           0 :         spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
    5675           0 :         return 0;
    5676             : }
    5677             : 
    5678             : static void
    5679          45 : nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
    5680             :                                     struct nvme_async_probe_ctx *ctx)
    5681             : {
    5682             :         struct nvme_ns  *nvme_ns;
    5683             :         struct nvme_bdev        *nvme_bdev;
    5684             :         size_t                  j;
    5685             : 
    5686          45 :         assert(nvme_ctrlr != NULL);
    5687             : 
    5688          45 :         if (ctx->names == NULL) {
    5689           0 :                 ctx->reported_bdevs = 0;
    5690           0 :                 populate_namespaces_cb(ctx, 0);
    5691           0 :                 return;
    5692             :         }
    5693             : 
    5694             :         /*
    5695             :          * Report the new bdevs that were created in this call.
    5696             :          * There can be more than one bdev per NVMe controller.
    5697             :          */
    5698          45 :         j = 0;
    5699          45 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    5700          92 :         while (nvme_ns != NULL) {
    5701          47 :                 nvme_bdev = nvme_ns->bdev;
    5702          47 :                 if (j < ctx->max_bdevs) {
    5703          47 :                         ctx->names[j] = nvme_bdev->disk.name;
    5704          47 :                         j++;
    5705             :                 } else {
    5706           0 :                         SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n",
    5707             :                                     ctx->max_bdevs);
    5708           0 :                         ctx->reported_bdevs = 0;
    5709           0 :                         populate_namespaces_cb(ctx, -ERANGE);
    5710           0 :                         return;
    5711             :                 }
    5712             : 
    5713          47 :                 nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    5714             :         }
    5715             : 
    5716          45 :         ctx->reported_bdevs = j;
    5717          45 :         populate_namespaces_cb(ctx, 0);
    5718             : }
    5719             : 
    5720             : static int
    5721           9 : bdev_nvme_check_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    5722             :                                struct spdk_nvme_ctrlr *new_ctrlr,
    5723             :                                struct spdk_nvme_transport_id *trid)
    5724             : {
    5725             :         struct nvme_path_id *tmp_trid;
    5726             : 
    5727           9 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    5728           0 :                 SPDK_ERRLOG("PCIe failover is not supported.\n");
    5729           0 :                 return -ENOTSUP;
    5730             :         }
    5731             : 
    5732             :         /* Currently we only support failover to the same transport type. */
    5733           9 :         if (nvme_ctrlr->active_path_id->trid.trtype != trid->trtype) {
    5734           0 :                 SPDK_WARNLOG("Failover from trtype: %s to a different trtype: %s is not supported currently\n",
    5735             :                              spdk_nvme_transport_id_trtype_str(nvme_ctrlr->active_path_id->trid.trtype),
    5736             :                              spdk_nvme_transport_id_trtype_str(trid->trtype));
    5737           0 :                 return -EINVAL;
    5738             :         }
    5739             : 
    5740             : 
    5741             :         /* Currently we only support failover to the same NQN. */
    5742           9 :         if (strncmp(trid->subnqn, nvme_ctrlr->active_path_id->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
    5743           0 :                 SPDK_WARNLOG("Failover from subnqn: %s to a different subnqn: %s is not supported currently\n",
    5744             :                              nvme_ctrlr->active_path_id->trid.subnqn, trid->subnqn);
    5745           0 :                 return -EINVAL;
    5746             :         }
    5747             : 
    5748             :         /* Skip all the other checks if we've already registered this path. */
    5749          21 :         TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) {
    5750          12 :                 if (!spdk_nvme_transport_id_compare(&tmp_trid->trid, trid)) {
    5751           0 :                         SPDK_WARNLOG("This path (traddr: %s subnqn: %s) is already registered\n", trid->traddr,
    5752             :                                      trid->subnqn);
    5753           0 :                         return -EEXIST;
    5754             :                 }
    5755             :         }
    5756             : 
    5757           9 :         return 0;
    5758             : }
    5759             : 
    5760             : static int
    5761           9 : bdev_nvme_check_secondary_namespace(struct nvme_ctrlr *nvme_ctrlr,
    5762             :                                     struct spdk_nvme_ctrlr *new_ctrlr)
    5763             : {
    5764             :         struct nvme_ns *nvme_ns;
    5765             :         struct spdk_nvme_ns *new_ns;
    5766             : 
    5767           9 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    5768           9 :         while (nvme_ns != NULL) {
    5769           0 :                 new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id);
    5770           0 :                 assert(new_ns != NULL);
    5771             : 
    5772           0 :                 if (!bdev_nvme_compare_ns(nvme_ns->ns, new_ns)) {
    5773           0 :                         return -EINVAL;
    5774             :                 }
    5775             : 
    5776           0 :                 nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    5777             :         }
    5778             : 
    5779           9 :         return 0;
    5780             : }
    5781             : 
    5782             : static int
    5783           9 : _bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    5784             :                               struct spdk_nvme_transport_id *trid)
    5785             : {
    5786             :         struct nvme_path_id *active_id, *new_trid, *tmp_trid;
    5787             : 
    5788           9 :         new_trid = calloc(1, sizeof(*new_trid));
    5789           9 :         if (new_trid == NULL) {
    5790           0 :                 return -ENOMEM;
    5791             :         }
    5792           9 :         new_trid->trid = *trid;
    5793             : 
    5794           9 :         active_id = nvme_ctrlr->active_path_id;
    5795           9 :         assert(active_id != NULL);
    5796           9 :         assert(active_id == TAILQ_FIRST(&nvme_ctrlr->trids));
    5797             : 
    5798             :         /* Skip the active trid not to replace it until it is failed. */
    5799           9 :         tmp_trid = TAILQ_NEXT(active_id, link);
    5800           9 :         if (tmp_trid == NULL) {
    5801           6 :                 goto add_tail;
    5802             :         }
    5803             : 
    5804             :         /* It means the trid is faled if its last failed time is non-zero.
    5805             :          * Insert the new alternate trid before any failed trid.
    5806             :          */
    5807           5 :         TAILQ_FOREACH_FROM(tmp_trid, &nvme_ctrlr->trids, link) {
    5808           3 :                 if (tmp_trid->last_failed_tsc != 0) {
    5809           1 :                         TAILQ_INSERT_BEFORE(tmp_trid, new_trid, link);
    5810           1 :                         return 0;
    5811             :                 }
    5812             :         }
    5813             : 
    5814           2 : add_tail:
    5815           8 :         TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, new_trid, link);
    5816           8 :         return 0;
    5817             : }
    5818             : 
    5819             : /* This is the case that a secondary path is added to an existing
    5820             :  * nvme_ctrlr for failover. After checking if it can access the same
    5821             :  * namespaces as the primary path, it is disconnected until failover occurs.
    5822             :  */
    5823             : static int
    5824           9 : bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    5825             :                              struct spdk_nvme_ctrlr *new_ctrlr,
    5826             :                              struct spdk_nvme_transport_id *trid)
    5827             : {
    5828             :         int rc;
    5829             : 
    5830           9 :         assert(nvme_ctrlr != NULL);
    5831             : 
    5832           9 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5833             : 
    5834           9 :         rc = bdev_nvme_check_secondary_trid(nvme_ctrlr, new_ctrlr, trid);
    5835           9 :         if (rc != 0) {
    5836           0 :                 goto exit;
    5837             :         }
    5838             : 
    5839           9 :         rc = bdev_nvme_check_secondary_namespace(nvme_ctrlr, new_ctrlr);
    5840           9 :         if (rc != 0) {
    5841           0 :                 goto exit;
    5842             :         }
    5843             : 
    5844           9 :         rc = _bdev_nvme_add_secondary_trid(nvme_ctrlr, trid);
    5845             : 
    5846           9 : exit:
    5847           9 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5848             : 
    5849           9 :         spdk_nvme_detach(new_ctrlr);
    5850             : 
    5851           9 :         return rc;
    5852             : }
    5853             : 
    5854             : static void
    5855          46 : connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    5856             :                   struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
    5857             : {
    5858          46 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    5859             :         struct nvme_async_probe_ctx *ctx;
    5860             :         int rc;
    5861             : 
    5862          46 :         ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
    5863          46 :         ctx->ctrlr_attached = true;
    5864             : 
    5865          46 :         rc = nvme_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx);
    5866          46 :         if (rc != 0) {
    5867           1 :                 ctx->reported_bdevs = 0;
    5868           1 :                 populate_namespaces_cb(ctx, rc);
    5869             :         }
    5870          46 : }
    5871             : 
    5872             : static void
    5873           4 : connect_set_failover_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    5874             :                         struct spdk_nvme_ctrlr *ctrlr,
    5875             :                         const struct spdk_nvme_ctrlr_opts *opts)
    5876             : {
    5877           4 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    5878             :         struct nvme_ctrlr *nvme_ctrlr;
    5879             :         struct nvme_async_probe_ctx *ctx;
    5880             :         int rc;
    5881             : 
    5882           4 :         ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
    5883           4 :         ctx->ctrlr_attached = true;
    5884             : 
    5885           4 :         nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
    5886           4 :         if (nvme_ctrlr) {
    5887           4 :                 rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, ctrlr, &ctx->trid);
    5888             :         } else {
    5889           0 :                 rc = -ENODEV;
    5890             :         }
    5891             : 
    5892           4 :         ctx->reported_bdevs = 0;
    5893           4 :         populate_namespaces_cb(ctx, rc);
    5894           4 : }
    5895             : 
    5896             : static int
    5897          51 : bdev_nvme_async_poll(void *arg)
    5898             : {
    5899          51 :         struct nvme_async_probe_ctx     *ctx = arg;
    5900             :         int                             rc;
    5901             : 
    5902          51 :         rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
    5903          51 :         if (spdk_unlikely(rc != -EAGAIN)) {
    5904          51 :                 ctx->probe_done = true;
    5905          51 :                 spdk_poller_unregister(&ctx->poller);
    5906          51 :                 if (!ctx->ctrlr_attached) {
    5907             :                         /* The probe is done, but no controller was attached.
    5908             :                          * That means we had a failure, so report -EIO back to
    5909             :                          * the caller (usually the RPC). populate_namespaces_cb()
    5910             :                          * will take care of freeing the nvme_async_probe_ctx.
    5911             :                          */
    5912           1 :                         ctx->reported_bdevs = 0;
    5913           1 :                         populate_namespaces_cb(ctx, -EIO);
    5914          50 :                 } else if (ctx->namespaces_populated) {
    5915             :                         /* The namespaces for the attached controller were all
    5916             :                          * populated and the response was already sent to the
    5917             :                          * caller (usually the RPC).  So free the context here.
    5918             :                          */
    5919          20 :                         free(ctx);
    5920             :                 }
    5921             :         }
    5922             : 
    5923          51 :         return SPDK_POLLER_BUSY;
    5924             : }
    5925             : 
    5926             : static bool
    5927          28 : bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
    5928             :                 uint32_t reconnect_delay_sec,
    5929             :                 uint32_t fast_io_fail_timeout_sec)
    5930             : {
    5931          28 :         if (ctrlr_loss_timeout_sec < -1) {
    5932           1 :                 SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n");
    5933           1 :                 return false;
    5934          27 :         } else if (ctrlr_loss_timeout_sec == -1) {
    5935          13 :                 if (reconnect_delay_sec == 0) {
    5936           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
    5937           1 :                         return false;
    5938          12 :                 } else if (fast_io_fail_timeout_sec != 0 &&
    5939             :                            fast_io_fail_timeout_sec < reconnect_delay_sec) {
    5940           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io-fail_timeout_sec.\n");
    5941           1 :                         return false;
    5942             :                 }
    5943          14 :         } else if (ctrlr_loss_timeout_sec != 0) {
    5944          11 :                 if (reconnect_delay_sec == 0) {
    5945           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
    5946           1 :                         return false;
    5947          10 :                 } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) {
    5948           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n");
    5949           1 :                         return false;
    5950           9 :                 } else if (fast_io_fail_timeout_sec != 0) {
    5951           6 :                         if (fast_io_fail_timeout_sec < reconnect_delay_sec) {
    5952           1 :                                 SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io_fail_timeout_sec.\n");
    5953           1 :                                 return false;
    5954           5 :                         } else if (fast_io_fail_timeout_sec > (uint32_t)ctrlr_loss_timeout_sec) {
    5955           1 :                                 SPDK_ERRLOG("fast_io_fail_timeout_sec can't be more than ctrlr_loss_timeout_sec.\n");
    5956           1 :                                 return false;
    5957             :                         }
    5958             :                 }
    5959           3 :         } else if (reconnect_delay_sec != 0 || fast_io_fail_timeout_sec != 0) {
    5960           2 :                 SPDK_ERRLOG("Both reconnect_delay_sec and fast_io_fail_timeout_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n");
    5961           2 :                 return false;
    5962             :         }
    5963             : 
    5964          19 :         return true;
    5965             : }
    5966             : 
    5967             : int
    5968          51 : bdev_nvme_create(struct spdk_nvme_transport_id *trid,
    5969             :                  const char *base_name,
    5970             :                  const char **names,
    5971             :                  uint32_t count,
    5972             :                  spdk_bdev_create_nvme_fn cb_fn,
    5973             :                  void *cb_ctx,
    5974             :                  struct spdk_nvme_ctrlr_opts *drv_opts,
    5975             :                  struct nvme_ctrlr_opts *bdev_opts,
    5976             :                  bool multipath)
    5977             : {
    5978             :         struct nvme_probe_skip_entry    *entry, *tmp;
    5979             :         struct nvme_async_probe_ctx     *ctx;
    5980             :         spdk_nvme_attach_cb attach_cb;
    5981             : 
    5982             :         /* TODO expand this check to include both the host and target TRIDs.
    5983             :          * Only if both are the same should we fail.
    5984             :          */
    5985          51 :         if (nvme_ctrlr_get(trid) != NULL) {
    5986           0 :                 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr);
    5987           0 :                 return -EEXIST;
    5988             :         }
    5989             : 
    5990          51 :         if (bdev_opts != NULL &&
    5991           9 :             !bdev_nvme_check_io_error_resiliency_params(bdev_opts->ctrlr_loss_timeout_sec,
    5992             :                             bdev_opts->reconnect_delay_sec,
    5993             :                             bdev_opts->fast_io_fail_timeout_sec)) {
    5994           0 :                 return -EINVAL;
    5995             :         }
    5996             : 
    5997          51 :         ctx = calloc(1, sizeof(*ctx));
    5998          51 :         if (!ctx) {
    5999           0 :                 return -ENOMEM;
    6000             :         }
    6001          51 :         ctx->base_name = base_name;
    6002          51 :         ctx->names = names;
    6003          51 :         ctx->max_bdevs = count;
    6004          51 :         ctx->cb_fn = cb_fn;
    6005          51 :         ctx->cb_ctx = cb_ctx;
    6006          51 :         ctx->trid = *trid;
    6007             : 
    6008          51 :         if (bdev_opts) {
    6009           9 :                 memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
    6010             :         } else {
    6011          42 :                 bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
    6012             :         }
    6013             : 
    6014          51 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    6015           0 :                 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
    6016           0 :                         if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
    6017           0 :                                 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
    6018           0 :                                 free(entry);
    6019           0 :                                 break;
    6020             :                         }
    6021             :                 }
    6022             :         }
    6023             : 
    6024          51 :         if (drv_opts) {
    6025           0 :                 memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
    6026             :         } else {
    6027          51 :                 spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->drv_opts, sizeof(ctx->drv_opts));
    6028             :         }
    6029             : 
    6030          51 :         ctx->drv_opts.transport_retry_count = g_opts.transport_retry_count;
    6031          51 :         ctx->drv_opts.transport_ack_timeout = g_opts.transport_ack_timeout;
    6032          51 :         ctx->drv_opts.keep_alive_timeout_ms = g_opts.keep_alive_timeout_ms;
    6033          51 :         ctx->drv_opts.disable_read_ana_log_page = true;
    6034          51 :         ctx->drv_opts.transport_tos = g_opts.transport_tos;
    6035             : 
    6036          51 :         if (nvme_bdev_ctrlr_get_by_name(base_name) == NULL || multipath) {
    6037          47 :                 attach_cb = connect_attach_cb;
    6038             :         } else {
    6039           4 :                 attach_cb = connect_set_failover_cb;
    6040             :         }
    6041             : 
    6042          51 :         ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, attach_cb);
    6043          51 :         if (ctx->probe_ctx == NULL) {
    6044           0 :                 SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
    6045           0 :                 free(ctx);
    6046           0 :                 return -ENODEV;
    6047             :         }
    6048          51 :         ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000);
    6049             : 
    6050          51 :         return 0;
    6051             : }
    6052             : 
    6053             : struct bdev_nvme_delete_ctx {
    6054             :         char                        *name;
    6055             :         struct nvme_path_id         path_id;
    6056             :         bdev_nvme_delete_done_fn    delete_done;
    6057             :         void                        *delete_done_ctx;
    6058             :         uint64_t                    timeout_ticks;
    6059             :         struct spdk_poller          *poller;
    6060             : };
    6061             : 
    6062             : static void
    6063           2 : free_bdev_nvme_delete_ctx(struct bdev_nvme_delete_ctx *ctx)
    6064             : {
    6065           2 :         if (ctx != NULL) {
    6066           1 :                 free(ctx->name);
    6067           1 :                 free(ctx);
    6068             :         }
    6069           2 : }
    6070             : 
    6071             : static bool
    6072          74 : nvme_path_id_compare(struct nvme_path_id *p, const struct nvme_path_id *path_id)
    6073             : {
    6074          74 :         if (path_id->trid.trtype != 0) {
    6075          21 :                 if (path_id->trid.trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
    6076           0 :                         if (strcasecmp(path_id->trid.trstring, p->trid.trstring) != 0) {
    6077           0 :                                 return false;
    6078             :                         }
    6079             :                 } else {
    6080          21 :                         if (path_id->trid.trtype != p->trid.trtype) {
    6081           0 :                                 return false;
    6082             :                         }
    6083             :                 }
    6084             :         }
    6085             : 
    6086          74 :         if (!spdk_mem_all_zero(path_id->trid.traddr, sizeof(path_id->trid.traddr))) {
    6087          21 :                 if (strcasecmp(path_id->trid.traddr, p->trid.traddr) != 0) {
    6088          11 :                         return false;
    6089             :                 }
    6090             :         }
    6091             : 
    6092          63 :         if (path_id->trid.adrfam != 0) {
    6093           0 :                 if (path_id->trid.adrfam != p->trid.adrfam) {
    6094           0 :                         return false;
    6095             :                 }
    6096             :         }
    6097             : 
    6098          63 :         if (!spdk_mem_all_zero(path_id->trid.trsvcid, sizeof(path_id->trid.trsvcid))) {
    6099          10 :                 if (strcasecmp(path_id->trid.trsvcid, p->trid.trsvcid) != 0) {
    6100           0 :                         return false;
    6101             :                 }
    6102             :         }
    6103             : 
    6104          63 :         if (!spdk_mem_all_zero(path_id->trid.subnqn, sizeof(path_id->trid.subnqn))) {
    6105          10 :                 if (strcmp(path_id->trid.subnqn, p->trid.subnqn) != 0) {
    6106           0 :                         return false;
    6107             :                 }
    6108             :         }
    6109             : 
    6110          63 :         if (!spdk_mem_all_zero(path_id->hostid.hostaddr, sizeof(path_id->hostid.hostaddr))) {
    6111           0 :                 if (strcmp(path_id->hostid.hostaddr, p->hostid.hostaddr) != 0) {
    6112           0 :                         return false;
    6113             :                 }
    6114             :         }
    6115             : 
    6116          63 :         if (!spdk_mem_all_zero(path_id->hostid.hostsvcid, sizeof(path_id->hostid.hostsvcid))) {
    6117           0 :                 if (strcmp(path_id->hostid.hostsvcid, p->hostid.hostsvcid) != 0) {
    6118           0 :                         return false;
    6119             :                 }
    6120             :         }
    6121             : 
    6122          63 :         return true;
    6123             : }
    6124             : 
    6125             : static bool
    6126           2 : nvme_path_id_exists(const char *name, const struct nvme_path_id *path_id)
    6127             : {
    6128             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
    6129             :         struct nvme_ctrlr       *ctrlr;
    6130             :         struct nvme_path_id     *p;
    6131             : 
    6132           2 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    6133           2 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    6134           2 :         if (!nbdev_ctrlr) {
    6135           1 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6136           1 :                 return false;
    6137             :         }
    6138             : 
    6139           1 :         TAILQ_FOREACH(ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    6140           1 :                 pthread_mutex_lock(&ctrlr->mutex);
    6141           1 :                 TAILQ_FOREACH(p, &ctrlr->trids, link) {
    6142           1 :                         if (nvme_path_id_compare(p, path_id)) {
    6143           1 :                                 pthread_mutex_unlock(&ctrlr->mutex);
    6144           1 :                                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6145           1 :                                 return true;
    6146             :                         }
    6147             :                 }
    6148           0 :                 pthread_mutex_unlock(&ctrlr->mutex);
    6149             :         }
    6150           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6151             : 
    6152           0 :         return false;
    6153             : }
    6154             : 
    6155             : static int
    6156           2 : bdev_nvme_delete_complete_poll(void *arg)
    6157             : {
    6158           2 :         struct bdev_nvme_delete_ctx     *ctx = arg;
    6159           2 :         int                             rc = 0;
    6160             : 
    6161           2 :         if (nvme_path_id_exists(ctx->name, &ctx->path_id)) {
    6162           1 :                 if (ctx->timeout_ticks > spdk_get_ticks()) {
    6163           1 :                         return SPDK_POLLER_BUSY;
    6164             :                 }
    6165             : 
    6166           0 :                 SPDK_ERRLOG("NVMe path '%s' still exists after delete\n", ctx->name);
    6167           0 :                 rc = -ETIMEDOUT;
    6168             :         }
    6169             : 
    6170           1 :         spdk_poller_unregister(&ctx->poller);
    6171             : 
    6172           1 :         ctx->delete_done(ctx->delete_done_ctx, rc);
    6173           1 :         free_bdev_nvme_delete_ctx(ctx);
    6174             : 
    6175           1 :         return SPDK_POLLER_BUSY;
    6176             : }
    6177             : 
    6178             : static int
    6179          63 : _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, const struct nvme_path_id *path_id)
    6180             : {
    6181             :         struct nvme_path_id     *p, *t;
    6182             :         spdk_msg_fn             msg_fn;
    6183          63 :         int                     rc = -ENXIO;
    6184             : 
    6185          63 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    6186             : 
    6187          73 :         TAILQ_FOREACH_REVERSE_SAFE(p, &nvme_ctrlr->trids, nvme_paths, link, t) {
    6188          73 :                 if (p == TAILQ_FIRST(&nvme_ctrlr->trids)) {
    6189          63 :                         break;
    6190             :                 }
    6191             : 
    6192          10 :                 if (!nvme_path_id_compare(p, path_id)) {
    6193           3 :                         continue;
    6194             :                 }
    6195             : 
    6196             :                 /* We are not using the specified path. */
    6197           7 :                 TAILQ_REMOVE(&nvme_ctrlr->trids, p, link);
    6198           7 :                 free(p);
    6199           7 :                 rc = 0;
    6200             :         }
    6201             : 
    6202          63 :         if (p == NULL || !nvme_path_id_compare(p, path_id)) {
    6203           8 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6204           8 :                 return rc;
    6205             :         }
    6206             : 
    6207             :         /* If we made it here, then this path is a match! Now we need to remove it. */
    6208             : 
    6209             :         /* This is the active path in use right now. The active path is always the first in the list. */
    6210          55 :         assert(p == nvme_ctrlr->active_path_id);
    6211             : 
    6212          55 :         if (!TAILQ_NEXT(p, link)) {
    6213             :                 /* The current path is the only path. */
    6214          54 :                 msg_fn = _nvme_ctrlr_destruct;
    6215          54 :                 rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, false);
    6216             :         } else {
    6217             :                 /* There is an alternative path. */
    6218           1 :                 msg_fn = _bdev_nvme_reset_ctrlr;
    6219           1 :                 rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, true);
    6220             :         }
    6221             : 
    6222          55 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6223             : 
    6224          55 :         if (rc == 0) {
    6225          55 :                 spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    6226           0 :         } else if (rc == -EALREADY) {
    6227           0 :                 rc = 0;
    6228             :         }
    6229             : 
    6230          55 :         return rc;
    6231             : }
    6232             : 
    6233             : int
    6234          48 : bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id,
    6235             :                  bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx)
    6236             : {
    6237             :         struct nvme_bdev_ctrlr          *nbdev_ctrlr;
    6238             :         struct nvme_ctrlr               *nvme_ctrlr, *tmp_nvme_ctrlr;
    6239          48 :         struct bdev_nvme_delete_ctx     *ctx = NULL;
    6240          48 :         int                             rc = -ENXIO, _rc;
    6241             : 
    6242          48 :         if (name == NULL || path_id == NULL) {
    6243           0 :                 rc = -EINVAL;
    6244           0 :                 goto exit;
    6245             :         }
    6246             : 
    6247          48 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    6248             : 
    6249          48 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    6250          48 :         if (nbdev_ctrlr == NULL) {
    6251           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6252             : 
    6253           0 :                 SPDK_ERRLOG("Failed to find NVMe bdev controller\n");
    6254           0 :                 rc = -ENODEV;
    6255           0 :                 goto exit;
    6256             :         }
    6257             : 
    6258         111 :         TAILQ_FOREACH_SAFE(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq, tmp_nvme_ctrlr) {
    6259          63 :                 _rc = _bdev_nvme_delete(nvme_ctrlr, path_id);
    6260          63 :                 if (_rc < 0 && _rc != -ENXIO) {
    6261           0 :                         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6262           0 :                         rc = _rc;
    6263           0 :                         goto exit;
    6264          63 :                 } else if (_rc == 0) {
    6265             :                         /* We traverse all remaining nvme_ctrlrs even if one nvme_ctrlr
    6266             :                          * was deleted successfully. To remember the successful deletion,
    6267             :                          * overwrite rc only if _rc is zero.
    6268             :                          */
    6269          57 :                         rc = 0;
    6270             :                 }
    6271             :         }
    6272             : 
    6273          48 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6274             : 
    6275          48 :         if (rc != 0 || delete_done == NULL) {
    6276          47 :                 goto exit;
    6277             :         }
    6278             : 
    6279           1 :         ctx = calloc(1, sizeof(*ctx));
    6280           1 :         if (ctx == NULL) {
    6281           0 :                 SPDK_ERRLOG("Failed to allocate context for bdev_nvme_delete\n");
    6282           0 :                 rc = -ENOMEM;
    6283           0 :                 goto exit;
    6284             :         }
    6285             : 
    6286           1 :         ctx->name = strdup(name);
    6287           1 :         if (ctx->name == NULL) {
    6288           0 :                 SPDK_ERRLOG("Failed to copy controller name for deletion\n");
    6289           0 :                 rc = -ENOMEM;
    6290           0 :                 goto exit;
    6291             :         }
    6292             : 
    6293           1 :         ctx->delete_done = delete_done;
    6294           1 :         ctx->delete_done_ctx = delete_done_ctx;
    6295           1 :         ctx->path_id = *path_id;
    6296           1 :         ctx->timeout_ticks = spdk_get_ticks() + 10 * spdk_get_ticks_hz();
    6297           1 :         ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_delete_complete_poll, ctx, 1000);
    6298           1 :         if (ctx->poller == NULL) {
    6299           0 :                 SPDK_ERRLOG("Failed to register bdev_nvme_delete poller\n");
    6300           0 :                 rc = -ENOMEM;
    6301           0 :                 goto exit;
    6302             :         }
    6303             : 
    6304           1 : exit:
    6305          48 :         if (rc != 0) {
    6306           1 :                 free_bdev_nvme_delete_ctx(ctx);
    6307             :         }
    6308             : 
    6309          48 :         return rc;
    6310             : }
    6311             : 
    6312             : #define DISCOVERY_INFOLOG(ctx, format, ...) \
    6313             :         SPDK_INFOLOG(bdev_nvme, "Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
    6314             : 
    6315             : #define DISCOVERY_ERRLOG(ctx, format, ...) \
    6316             :         SPDK_ERRLOG("Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
    6317             : 
    6318             : struct discovery_entry_ctx {
    6319             :         char                                            name[128];
    6320             :         struct spdk_nvme_transport_id                   trid;
    6321             :         struct spdk_nvme_ctrlr_opts                     drv_opts;
    6322             :         struct spdk_nvmf_discovery_log_page_entry       entry;
    6323             :         TAILQ_ENTRY(discovery_entry_ctx)                tailq;
    6324             :         struct discovery_ctx                            *ctx;
    6325             : };
    6326             : 
    6327             : struct discovery_ctx {
    6328             :         char                                    *name;
    6329             :         spdk_bdev_nvme_start_discovery_fn       start_cb_fn;
    6330             :         spdk_bdev_nvme_stop_discovery_fn        stop_cb_fn;
    6331             :         void                                    *cb_ctx;
    6332             :         struct spdk_nvme_probe_ctx              *probe_ctx;
    6333             :         struct spdk_nvme_detach_ctx             *detach_ctx;
    6334             :         struct spdk_nvme_ctrlr                  *ctrlr;
    6335             :         struct spdk_nvme_transport_id           trid;
    6336             :         struct discovery_entry_ctx              *entry_ctx_in_use;
    6337             :         struct spdk_poller                      *poller;
    6338             :         struct spdk_nvme_ctrlr_opts             drv_opts;
    6339             :         struct nvme_ctrlr_opts                  bdev_opts;
    6340             :         struct spdk_nvmf_discovery_log_page     *log_page;
    6341             :         TAILQ_ENTRY(discovery_ctx)              tailq;
    6342             :         TAILQ_HEAD(, discovery_entry_ctx)       nvm_entry_ctxs;
    6343             :         TAILQ_HEAD(, discovery_entry_ctx)       discovery_entry_ctxs;
    6344             :         int                                     rc;
    6345             :         bool                                    wait_for_attach;
    6346             :         uint64_t                                timeout_ticks;
    6347             :         /* Denotes that the discovery service is being started. We're waiting
    6348             :          * for the initial connection to the discovery controller to be
    6349             :          * established and attach discovered NVM ctrlrs.
    6350             :          */
    6351             :         bool                                    initializing;
    6352             :         /* Denotes if a discovery is currently in progress for this context.
    6353             :          * That includes connecting to newly discovered subsystems.  Used to
    6354             :          * ensure we do not start a new discovery until an existing one is
    6355             :          * complete.
    6356             :          */
    6357             :         bool                                    in_progress;
    6358             : 
    6359             :         /* Denotes if another discovery is needed after the one in progress
    6360             :          * completes.  Set when we receive an AER completion while a discovery
    6361             :          * is already in progress.
    6362             :          */
    6363             :         bool                                    pending;
    6364             : 
    6365             :         /* Signal to the discovery context poller that it should stop the
    6366             :          * discovery service, including detaching from the current discovery
    6367             :          * controller.
    6368             :          */
    6369             :         bool                                    stop;
    6370             : 
    6371             :         struct spdk_thread                      *calling_thread;
    6372             :         uint32_t                                index;
    6373             :         uint32_t                                attach_in_progress;
    6374             :         char                                    *hostnqn;
    6375             : 
    6376             :         /* Denotes if the discovery service was started by the mdns discovery.
    6377             :          */
    6378             :         bool                                    from_mdns_discovery_service;
    6379             : };
    6380             : 
    6381             : TAILQ_HEAD(discovery_ctxs, discovery_ctx);
    6382             : static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs);
    6383             : 
    6384             : static void get_discovery_log_page(struct discovery_ctx *ctx);
    6385             : 
    6386             : static void
    6387           0 : free_discovery_ctx(struct discovery_ctx *ctx)
    6388             : {
    6389           0 :         free(ctx->log_page);
    6390           0 :         free(ctx->hostnqn);
    6391           0 :         free(ctx->name);
    6392           0 :         free(ctx);
    6393           0 : }
    6394             : 
    6395             : static void
    6396           0 : discovery_complete(struct discovery_ctx *ctx)
    6397             : {
    6398           0 :         ctx->initializing = false;
    6399           0 :         ctx->in_progress = false;
    6400           0 :         if (ctx->pending) {
    6401           0 :                 ctx->pending = false;
    6402           0 :                 get_discovery_log_page(ctx);
    6403             :         }
    6404           0 : }
    6405             : 
    6406             : static void
    6407           0 : build_trid_from_log_page_entry(struct spdk_nvme_transport_id *trid,
    6408             :                                struct spdk_nvmf_discovery_log_page_entry *entry)
    6409             : {
    6410             :         char *space;
    6411             : 
    6412           0 :         trid->trtype = entry->trtype;
    6413           0 :         trid->adrfam = entry->adrfam;
    6414           0 :         memcpy(trid->traddr, entry->traddr, sizeof(entry->traddr));
    6415           0 :         memcpy(trid->trsvcid, entry->trsvcid, sizeof(entry->trsvcid));
    6416             :         /* Because the source buffer (entry->subnqn) is longer than trid->subnqn, and
    6417             :          * before call to this function trid->subnqn is zeroed out, we need
    6418             :          * to copy sizeof(trid->subnqn) minus one byte to make sure the last character
    6419             :          * remains 0. Then we can shorten the string (replace ' ' with 0) if required
    6420             :          */
    6421           0 :         memcpy(trid->subnqn, entry->subnqn, sizeof(trid->subnqn) - 1);
    6422             : 
    6423             :         /* We want the traddr, trsvcid and subnqn fields to be NULL-terminated.
    6424             :          * But the log page entries typically pad them with spaces, not zeroes.
    6425             :          * So add a NULL terminator to each of these fields at the appropriate
    6426             :          * location.
    6427             :          */
    6428           0 :         space = strchr(trid->traddr, ' ');
    6429           0 :         if (space) {
    6430           0 :                 *space = 0;
    6431             :         }
    6432           0 :         space = strchr(trid->trsvcid, ' ');
    6433           0 :         if (space) {
    6434           0 :                 *space = 0;
    6435             :         }
    6436           0 :         space = strchr(trid->subnqn, ' ');
    6437           0 :         if (space) {
    6438           0 :                 *space = 0;
    6439             :         }
    6440           0 : }
    6441             : 
    6442             : static void
    6443           0 : _stop_discovery(void *_ctx)
    6444             : {
    6445           0 :         struct discovery_ctx *ctx = _ctx;
    6446             : 
    6447           0 :         if (ctx->attach_in_progress > 0) {
    6448           0 :                 spdk_thread_send_msg(spdk_get_thread(), _stop_discovery, ctx);
    6449           0 :                 return;
    6450             :         }
    6451             : 
    6452           0 :         ctx->stop = true;
    6453             : 
    6454           0 :         while (!TAILQ_EMPTY(&ctx->nvm_entry_ctxs)) {
    6455             :                 struct discovery_entry_ctx *entry_ctx;
    6456           0 :                 struct nvme_path_id path = {};
    6457             : 
    6458           0 :                 entry_ctx = TAILQ_FIRST(&ctx->nvm_entry_ctxs);
    6459           0 :                 path.trid = entry_ctx->trid;
    6460           0 :                 bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
    6461           0 :                 TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
    6462           0 :                 free(entry_ctx);
    6463             :         }
    6464             : 
    6465           0 :         while (!TAILQ_EMPTY(&ctx->discovery_entry_ctxs)) {
    6466             :                 struct discovery_entry_ctx *entry_ctx;
    6467             : 
    6468           0 :                 entry_ctx = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
    6469           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
    6470           0 :                 free(entry_ctx);
    6471             :         }
    6472             : 
    6473           0 :         free(ctx->entry_ctx_in_use);
    6474           0 :         ctx->entry_ctx_in_use = NULL;
    6475             : }
    6476             : 
    6477             : static void
    6478           0 : stop_discovery(struct discovery_ctx *ctx, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
    6479             : {
    6480           0 :         ctx->stop_cb_fn = cb_fn;
    6481           0 :         ctx->cb_ctx = cb_ctx;
    6482             : 
    6483           0 :         if (ctx->attach_in_progress > 0) {
    6484           0 :                 DISCOVERY_INFOLOG(ctx, "stopping discovery with attach_in_progress: %"PRIu32"\n",
    6485             :                                   ctx->attach_in_progress);
    6486             :         }
    6487             : 
    6488           0 :         _stop_discovery(ctx);
    6489           0 : }
    6490             : 
    6491             : static void
    6492           2 : remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr)
    6493             : {
    6494             :         struct discovery_ctx *d_ctx;
    6495             :         struct nvme_path_id *path_id;
    6496           2 :         struct spdk_nvme_transport_id trid = {};
    6497             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    6498             : 
    6499           2 :         path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
    6500             : 
    6501           2 :         TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
    6502           0 :                 TAILQ_FOREACH_SAFE(entry_ctx, &d_ctx->nvm_entry_ctxs, tailq, tmp) {
    6503           0 :                         build_trid_from_log_page_entry(&trid, &entry_ctx->entry);
    6504           0 :                         if (spdk_nvme_transport_id_compare(&trid, &path_id->trid) != 0) {
    6505           0 :                                 continue;
    6506             :                         }
    6507             : 
    6508           0 :                         TAILQ_REMOVE(&d_ctx->nvm_entry_ctxs, entry_ctx, tailq);
    6509           0 :                         free(entry_ctx);
    6510           0 :                         DISCOVERY_INFOLOG(d_ctx, "Remove discovery entry: %s:%s:%s\n",
    6511             :                                           trid.subnqn, trid.traddr, trid.trsvcid);
    6512             : 
    6513             :                         /* Fail discovery ctrlr to force reattach attempt */
    6514           0 :                         spdk_nvme_ctrlr_fail(d_ctx->ctrlr);
    6515             :                 }
    6516             :         }
    6517           2 : }
    6518             : 
    6519             : static void
    6520           0 : discovery_remove_controllers(struct discovery_ctx *ctx)
    6521             : {
    6522           0 :         struct spdk_nvmf_discovery_log_page *log_page = ctx->log_page;
    6523             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    6524             :         struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
    6525           0 :         struct spdk_nvme_transport_id old_trid = {};
    6526             :         uint64_t numrec, i;
    6527             :         bool found;
    6528             : 
    6529           0 :         numrec = from_le64(&log_page->numrec);
    6530           0 :         TAILQ_FOREACH_SAFE(entry_ctx, &ctx->nvm_entry_ctxs, tailq, tmp) {
    6531           0 :                 found = false;
    6532           0 :                 old_entry = &entry_ctx->entry;
    6533           0 :                 build_trid_from_log_page_entry(&old_trid, old_entry);
    6534           0 :                 for (i = 0; i < numrec; i++) {
    6535           0 :                         new_entry = &log_page->entries[i];
    6536           0 :                         if (!memcmp(old_entry, new_entry, sizeof(*old_entry))) {
    6537           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s found again\n",
    6538             :                                                   old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
    6539           0 :                                 found = true;
    6540           0 :                                 break;
    6541             :                         }
    6542             :                 }
    6543           0 :                 if (!found) {
    6544           0 :                         struct nvme_path_id path = {};
    6545             : 
    6546           0 :                         DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s not found\n",
    6547             :                                           old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
    6548             : 
    6549           0 :                         path.trid = entry_ctx->trid;
    6550           0 :                         bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
    6551           0 :                         TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
    6552           0 :                         free(entry_ctx);
    6553             :                 }
    6554             :         }
    6555           0 :         free(log_page);
    6556           0 :         ctx->log_page = NULL;
    6557           0 :         discovery_complete(ctx);
    6558           0 : }
    6559             : 
    6560             : static void
    6561           0 : complete_discovery_start(struct discovery_ctx *ctx, int status)
    6562             : {
    6563           0 :         ctx->timeout_ticks = 0;
    6564           0 :         ctx->rc = status;
    6565           0 :         if (ctx->start_cb_fn) {
    6566           0 :                 ctx->start_cb_fn(ctx->cb_ctx, status);
    6567           0 :                 ctx->start_cb_fn = NULL;
    6568           0 :                 ctx->cb_ctx = NULL;
    6569             :         }
    6570           0 : }
    6571             : 
    6572             : static void
    6573           0 : discovery_attach_controller_done(void *cb_ctx, size_t bdev_count, int rc)
    6574             : {
    6575           0 :         struct discovery_entry_ctx *entry_ctx = cb_ctx;
    6576           0 :         struct discovery_ctx *ctx = entry_ctx->ctx;
    6577             : 
    6578           0 :         DISCOVERY_INFOLOG(ctx, "attach %s done\n", entry_ctx->name);
    6579           0 :         ctx->attach_in_progress--;
    6580           0 :         if (ctx->attach_in_progress == 0) {
    6581           0 :                 complete_discovery_start(ctx, ctx->rc);
    6582           0 :                 if (ctx->initializing && ctx->rc != 0) {
    6583           0 :                         DISCOVERY_ERRLOG(ctx, "stopping discovery due to errors: %d\n", ctx->rc);
    6584           0 :                         stop_discovery(ctx, NULL, ctx->cb_ctx);
    6585             :                 } else {
    6586           0 :                         discovery_remove_controllers(ctx);
    6587             :                 }
    6588             :         }
    6589           0 : }
    6590             : 
    6591             : static struct discovery_entry_ctx *
    6592           0 : create_discovery_entry_ctx(struct discovery_ctx *ctx, struct spdk_nvme_transport_id *trid)
    6593             : {
    6594             :         struct discovery_entry_ctx *new_ctx;
    6595             : 
    6596           0 :         new_ctx = calloc(1, sizeof(*new_ctx));
    6597           0 :         if (new_ctx == NULL) {
    6598           0 :                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    6599           0 :                 return NULL;
    6600             :         }
    6601             : 
    6602           0 :         new_ctx->ctx = ctx;
    6603           0 :         memcpy(&new_ctx->trid, trid, sizeof(*trid));
    6604           0 :         spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
    6605           0 :         snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
    6606           0 :         return new_ctx;
    6607             : }
    6608             : 
    6609             : static void
    6610           0 : discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
    6611             :                       struct spdk_nvmf_discovery_log_page *log_page)
    6612             : {
    6613           0 :         struct discovery_ctx *ctx = cb_arg;
    6614             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    6615             :         struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
    6616             :         uint64_t numrec, i;
    6617             :         bool found;
    6618             : 
    6619           0 :         if (rc || spdk_nvme_cpl_is_error(cpl)) {
    6620           0 :                 DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
    6621           0 :                 return;
    6622             :         }
    6623             : 
    6624           0 :         ctx->log_page = log_page;
    6625           0 :         assert(ctx->attach_in_progress == 0);
    6626           0 :         numrec = from_le64(&log_page->numrec);
    6627           0 :         TAILQ_FOREACH_SAFE(entry_ctx, &ctx->discovery_entry_ctxs, tailq, tmp) {
    6628           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
    6629           0 :                 free(entry_ctx);
    6630             :         }
    6631           0 :         for (i = 0; i < numrec; i++) {
    6632           0 :                 found = false;
    6633           0 :                 new_entry = &log_page->entries[i];
    6634           0 :                 if (new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT ||
    6635           0 :                     new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
    6636             :                         struct discovery_entry_ctx *new_ctx;
    6637           0 :                         struct spdk_nvme_transport_id trid = {};
    6638             : 
    6639           0 :                         build_trid_from_log_page_entry(&trid, new_entry);
    6640           0 :                         new_ctx = create_discovery_entry_ctx(ctx, &trid);
    6641           0 :                         if (new_ctx == NULL) {
    6642           0 :                                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    6643           0 :                                 break;
    6644             :                         }
    6645             : 
    6646           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, new_ctx, tailq);
    6647           0 :                         continue;
    6648             :                 }
    6649           0 :                 TAILQ_FOREACH(entry_ctx, &ctx->nvm_entry_ctxs, tailq) {
    6650           0 :                         old_entry = &entry_ctx->entry;
    6651           0 :                         if (!memcmp(new_entry, old_entry, sizeof(*new_entry))) {
    6652           0 :                                 found = true;
    6653           0 :                                 break;
    6654             :                         }
    6655             :                 }
    6656           0 :                 if (!found) {
    6657           0 :                         struct discovery_entry_ctx *subnqn_ctx = NULL, *new_ctx;
    6658             :                         struct discovery_ctx *d_ctx;
    6659             : 
    6660           0 :                         TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
    6661           0 :                                 TAILQ_FOREACH(subnqn_ctx, &d_ctx->nvm_entry_ctxs, tailq) {
    6662           0 :                                         if (!memcmp(subnqn_ctx->entry.subnqn, new_entry->subnqn,
    6663             :                                                     sizeof(new_entry->subnqn))) {
    6664           0 :                                                 break;
    6665             :                                         }
    6666             :                                 }
    6667           0 :                                 if (subnqn_ctx) {
    6668           0 :                                         break;
    6669             :                                 }
    6670             :                         }
    6671             : 
    6672           0 :                         new_ctx = calloc(1, sizeof(*new_ctx));
    6673           0 :                         if (new_ctx == NULL) {
    6674           0 :                                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    6675           0 :                                 break;
    6676             :                         }
    6677             : 
    6678           0 :                         new_ctx->ctx = ctx;
    6679           0 :                         memcpy(&new_ctx->entry, new_entry, sizeof(*new_entry));
    6680           0 :                         build_trid_from_log_page_entry(&new_ctx->trid, new_entry);
    6681           0 :                         if (subnqn_ctx) {
    6682           0 :                                 snprintf(new_ctx->name, sizeof(new_ctx->name), "%s", subnqn_ctx->name);
    6683           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new path for %s\n",
    6684             :                                                   new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
    6685             :                                                   new_ctx->name);
    6686             :                         } else {
    6687           0 :                                 snprintf(new_ctx->name, sizeof(new_ctx->name), "%s%d", ctx->name, ctx->index++);
    6688           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new subsystem %s\n",
    6689             :                                                   new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
    6690             :                                                   new_ctx->name);
    6691             :                         }
    6692           0 :                         spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
    6693           0 :                         snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
    6694           0 :                         rc = bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0,
    6695             :                                               discovery_attach_controller_done, new_ctx,
    6696             :                                               &new_ctx->drv_opts, &ctx->bdev_opts, true);
    6697           0 :                         if (rc == 0) {
    6698           0 :                                 TAILQ_INSERT_TAIL(&ctx->nvm_entry_ctxs, new_ctx, tailq);
    6699           0 :                                 ctx->attach_in_progress++;
    6700             :                         } else {
    6701           0 :                                 DISCOVERY_ERRLOG(ctx, "bdev_nvme_create failed (%s)\n", spdk_strerror(-rc));
    6702             :                         }
    6703             :                 }
    6704             :         }
    6705             : 
    6706           0 :         if (ctx->attach_in_progress == 0) {
    6707           0 :                 discovery_remove_controllers(ctx);
    6708             :         }
    6709             : }
    6710             : 
    6711             : static void
    6712           0 : get_discovery_log_page(struct discovery_ctx *ctx)
    6713             : {
    6714             :         int rc;
    6715             : 
    6716           0 :         assert(ctx->in_progress == false);
    6717           0 :         ctx->in_progress = true;
    6718           0 :         rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx);
    6719           0 :         if (rc != 0) {
    6720           0 :                 DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
    6721             :         }
    6722           0 :         DISCOVERY_INFOLOG(ctx, "sent discovery log page command\n");
    6723           0 : }
    6724             : 
    6725             : static void
    6726           0 : discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
    6727             : {
    6728           0 :         struct discovery_ctx *ctx = arg;
    6729           0 :         uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
    6730             : 
    6731           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    6732           0 :                 DISCOVERY_ERRLOG(ctx, "aer failed\n");
    6733           0 :                 return;
    6734             :         }
    6735             : 
    6736           0 :         if (log_page_id != SPDK_NVME_LOG_DISCOVERY) {
    6737           0 :                 DISCOVERY_ERRLOG(ctx, "unexpected log page 0x%x\n", log_page_id);
    6738           0 :                 return;
    6739             :         }
    6740             : 
    6741           0 :         DISCOVERY_INFOLOG(ctx, "got aer\n");
    6742           0 :         if (ctx->in_progress) {
    6743           0 :                 ctx->pending = true;
    6744           0 :                 return;
    6745             :         }
    6746             : 
    6747           0 :         get_discovery_log_page(ctx);
    6748             : }
    6749             : 
    6750             : static void
    6751           0 : discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    6752             :                     struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
    6753             : {
    6754           0 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    6755             :         struct discovery_ctx *ctx;
    6756             : 
    6757           0 :         ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, drv_opts);
    6758             : 
    6759           0 :         DISCOVERY_INFOLOG(ctx, "discovery ctrlr attached\n");
    6760           0 :         ctx->probe_ctx = NULL;
    6761           0 :         ctx->ctrlr = ctrlr;
    6762             : 
    6763           0 :         if (ctx->rc != 0) {
    6764           0 :                 DISCOVERY_ERRLOG(ctx, "encountered error while attaching discovery ctrlr: %d\n",
    6765             :                                  ctx->rc);
    6766           0 :                 return;
    6767             :         }
    6768             : 
    6769           0 :         spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx);
    6770             : }
    6771             : 
    6772             : static int
    6773           0 : discovery_poller(void *arg)
    6774             : {
    6775           0 :         struct discovery_ctx *ctx = arg;
    6776             :         struct spdk_nvme_transport_id *trid;
    6777             :         int rc;
    6778             : 
    6779           0 :         if (ctx->detach_ctx) {
    6780           0 :                 rc = spdk_nvme_detach_poll_async(ctx->detach_ctx);
    6781           0 :                 if (rc != -EAGAIN) {
    6782           0 :                         ctx->detach_ctx = NULL;
    6783           0 :                         ctx->ctrlr = NULL;
    6784             :                 }
    6785           0 :         } else if (ctx->stop) {
    6786           0 :                 if (ctx->ctrlr != NULL) {
    6787           0 :                         rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
    6788           0 :                         if (rc == 0) {
    6789           0 :                                 return SPDK_POLLER_BUSY;
    6790             :                         }
    6791           0 :                         DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
    6792             :                 }
    6793           0 :                 spdk_poller_unregister(&ctx->poller);
    6794           0 :                 TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
    6795           0 :                 assert(ctx->start_cb_fn == NULL);
    6796           0 :                 if (ctx->stop_cb_fn != NULL) {
    6797           0 :                         ctx->stop_cb_fn(ctx->cb_ctx);
    6798             :                 }
    6799           0 :                 free_discovery_ctx(ctx);
    6800           0 :         } else if (ctx->probe_ctx == NULL && ctx->ctrlr == NULL) {
    6801           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    6802           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
    6803           0 :                         assert(ctx->initializing);
    6804           0 :                         spdk_poller_unregister(&ctx->poller);
    6805           0 :                         TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
    6806           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    6807           0 :                         stop_discovery(ctx, NULL, NULL);
    6808           0 :                         free_discovery_ctx(ctx);
    6809           0 :                         return SPDK_POLLER_BUSY;
    6810             :                 }
    6811             : 
    6812           0 :                 assert(ctx->entry_ctx_in_use == NULL);
    6813           0 :                 ctx->entry_ctx_in_use = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
    6814           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    6815           0 :                 trid = &ctx->entry_ctx_in_use->trid;
    6816           0 :                 ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, discovery_attach_cb);
    6817           0 :                 if (ctx->probe_ctx) {
    6818           0 :                         spdk_poller_unregister(&ctx->poller);
    6819           0 :                         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000);
    6820             :                 } else {
    6821           0 :                         DISCOVERY_ERRLOG(ctx, "could not start discovery connect\n");
    6822           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    6823           0 :                         ctx->entry_ctx_in_use = NULL;
    6824             :                 }
    6825           0 :         } else if (ctx->probe_ctx) {
    6826           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    6827           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
    6828           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    6829           0 :                         return SPDK_POLLER_BUSY;
    6830             :                 }
    6831             : 
    6832           0 :                 rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
    6833           0 :                 if (rc != -EAGAIN) {
    6834           0 :                         if (ctx->rc != 0) {
    6835           0 :                                 assert(ctx->initializing);
    6836           0 :                                 stop_discovery(ctx, NULL, ctx->cb_ctx);
    6837             :                         } else {
    6838           0 :                                 assert(rc == 0);
    6839           0 :                                 DISCOVERY_INFOLOG(ctx, "discovery ctrlr connected\n");
    6840           0 :                                 ctx->rc = rc;
    6841           0 :                                 get_discovery_log_page(ctx);
    6842             :                         }
    6843             :                 }
    6844             :         } else {
    6845           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    6846           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching NVM ctrlrs\n");
    6847           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    6848             :                         /* We need to wait until all NVM ctrlrs are attached before we stop the
    6849             :                          * discovery service to make sure we don't detach a ctrlr that is still
    6850             :                          * being attached.
    6851             :                          */
    6852           0 :                         if (ctx->attach_in_progress == 0) {
    6853           0 :                                 stop_discovery(ctx, NULL, ctx->cb_ctx);
    6854           0 :                                 return SPDK_POLLER_BUSY;
    6855             :                         }
    6856             :                 }
    6857             : 
    6858           0 :                 rc = spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr);
    6859           0 :                 if (rc < 0) {
    6860           0 :                         spdk_poller_unregister(&ctx->poller);
    6861           0 :                         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
    6862           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    6863           0 :                         ctx->entry_ctx_in_use = NULL;
    6864             : 
    6865           0 :                         rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
    6866           0 :                         if (rc != 0) {
    6867           0 :                                 DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
    6868           0 :                                 ctx->ctrlr = NULL;
    6869             :                         }
    6870             :                 }
    6871             :         }
    6872             : 
    6873           0 :         return SPDK_POLLER_BUSY;
    6874             : }
    6875             : 
    6876             : static void
    6877           0 : start_discovery_poller(void *arg)
    6878             : {
    6879           0 :         struct discovery_ctx *ctx = arg;
    6880             : 
    6881           0 :         TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq);
    6882           0 :         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
    6883           0 : }
    6884             : 
    6885             : int
    6886           0 : bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid,
    6887             :                           const char *base_name,
    6888             :                           struct spdk_nvme_ctrlr_opts *drv_opts,
    6889             :                           struct nvme_ctrlr_opts *bdev_opts,
    6890             :                           uint64_t attach_timeout,
    6891             :                           bool from_mdns,
    6892             :                           spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx)
    6893             : {
    6894             :         struct discovery_ctx *ctx;
    6895             :         struct discovery_entry_ctx *discovery_entry_ctx;
    6896             : 
    6897           0 :         snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
    6898           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    6899           0 :                 if (strcmp(ctx->name, base_name) == 0) {
    6900           0 :                         return -EEXIST;
    6901             :                 }
    6902             : 
    6903           0 :                 if (ctx->entry_ctx_in_use != NULL) {
    6904           0 :                         if (!spdk_nvme_transport_id_compare(trid, &ctx->entry_ctx_in_use->trid)) {
    6905           0 :                                 return -EEXIST;
    6906             :                         }
    6907             :                 }
    6908             : 
    6909           0 :                 TAILQ_FOREACH(discovery_entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
    6910           0 :                         if (!spdk_nvme_transport_id_compare(trid, &discovery_entry_ctx->trid)) {
    6911           0 :                                 return -EEXIST;
    6912             :                         }
    6913             :                 }
    6914             :         }
    6915             : 
    6916           0 :         ctx = calloc(1, sizeof(*ctx));
    6917           0 :         if (ctx == NULL) {
    6918           0 :                 return -ENOMEM;
    6919             :         }
    6920             : 
    6921           0 :         ctx->name = strdup(base_name);
    6922           0 :         if (ctx->name == NULL) {
    6923           0 :                 free_discovery_ctx(ctx);
    6924           0 :                 return -ENOMEM;
    6925             :         }
    6926           0 :         memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
    6927           0 :         memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
    6928           0 :         ctx->from_mdns_discovery_service = from_mdns;
    6929           0 :         ctx->bdev_opts.from_discovery_service = true;
    6930           0 :         ctx->calling_thread = spdk_get_thread();
    6931           0 :         ctx->start_cb_fn = cb_fn;
    6932           0 :         ctx->cb_ctx = cb_ctx;
    6933           0 :         ctx->initializing = true;
    6934           0 :         if (ctx->start_cb_fn) {
    6935             :                 /* We can use this when dumping json to denote if this RPC parameter
    6936             :                  * was specified or not.
    6937             :                  */
    6938           0 :                 ctx->wait_for_attach = true;
    6939             :         }
    6940           0 :         if (attach_timeout != 0) {
    6941           0 :                 ctx->timeout_ticks = spdk_get_ticks() + attach_timeout *
    6942           0 :                                      spdk_get_ticks_hz() / 1000ull;
    6943             :         }
    6944           0 :         TAILQ_INIT(&ctx->nvm_entry_ctxs);
    6945           0 :         TAILQ_INIT(&ctx->discovery_entry_ctxs);
    6946           0 :         memcpy(&ctx->trid, trid, sizeof(*trid));
    6947             :         /* Even if user did not specify hostnqn, we can still strdup("\0"); */
    6948           0 :         ctx->hostnqn = strdup(ctx->drv_opts.hostnqn);
    6949           0 :         if (ctx->hostnqn == NULL) {
    6950           0 :                 free_discovery_ctx(ctx);
    6951           0 :                 return -ENOMEM;
    6952             :         }
    6953           0 :         discovery_entry_ctx = create_discovery_entry_ctx(ctx, trid);
    6954           0 :         if (discovery_entry_ctx == NULL) {
    6955           0 :                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    6956           0 :                 free_discovery_ctx(ctx);
    6957           0 :                 return -ENOMEM;
    6958             :         }
    6959             : 
    6960           0 :         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, discovery_entry_ctx, tailq);
    6961           0 :         spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx);
    6962           0 :         return 0;
    6963             : }
    6964             : 
    6965             : int
    6966           0 : bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
    6967             : {
    6968             :         struct discovery_ctx *ctx;
    6969             : 
    6970           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    6971           0 :                 if (strcmp(name, ctx->name) == 0) {
    6972           0 :                         if (ctx->stop) {
    6973           0 :                                 return -EALREADY;
    6974             :                         }
    6975             :                         /* If we're still starting the discovery service and ->rc is non-zero, we're
    6976             :                          * going to stop it as soon as we can
    6977             :                          */
    6978           0 :                         if (ctx->initializing && ctx->rc != 0) {
    6979           0 :                                 return -EALREADY;
    6980             :                         }
    6981           0 :                         stop_discovery(ctx, cb_fn, cb_ctx);
    6982           0 :                         return 0;
    6983             :                 }
    6984             :         }
    6985             : 
    6986           0 :         return -ENOENT;
    6987             : }
    6988             : 
    6989             : static int
    6990           1 : bdev_nvme_library_init(void)
    6991             : {
    6992           1 :         g_bdev_nvme_init_thread = spdk_get_thread();
    6993             : 
    6994           1 :         spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_create_poll_group_cb,
    6995             :                                 bdev_nvme_destroy_poll_group_cb,
    6996             :                                 sizeof(struct nvme_poll_group),  "nvme_poll_groups");
    6997             : 
    6998           1 :         return 0;
    6999             : }
    7000             : 
    7001             : static void
    7002           1 : bdev_nvme_fini_destruct_ctrlrs(void)
    7003             : {
    7004             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    7005             :         struct nvme_ctrlr *nvme_ctrlr;
    7006             : 
    7007           1 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    7008           1 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
    7009           0 :                 TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    7010           0 :                         pthread_mutex_lock(&nvme_ctrlr->mutex);
    7011           0 :                         if (nvme_ctrlr->destruct) {
    7012             :                                 /* This controller's destruction was already started
    7013             :                                  * before the application started shutting down
    7014             :                                  */
    7015           0 :                                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    7016           0 :                                 continue;
    7017             :                         }
    7018           0 :                         nvme_ctrlr->destruct = true;
    7019           0 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    7020             : 
    7021           0 :                         spdk_thread_send_msg(nvme_ctrlr->thread, _nvme_ctrlr_destruct,
    7022             :                                              nvme_ctrlr);
    7023             :                 }
    7024             :         }
    7025             : 
    7026           1 :         g_bdev_nvme_module_finish = true;
    7027           1 :         if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    7028           1 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7029           1 :                 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
    7030           1 :                 spdk_bdev_module_fini_done();
    7031           1 :                 return;
    7032             :         }
    7033             : 
    7034           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7035             : }
    7036             : 
    7037             : static void
    7038           0 : check_discovery_fini(void *arg)
    7039             : {
    7040           0 :         if (TAILQ_EMPTY(&g_discovery_ctxs)) {
    7041           0 :                 bdev_nvme_fini_destruct_ctrlrs();
    7042             :         }
    7043           0 : }
    7044             : 
    7045             : static void
    7046           1 : bdev_nvme_library_fini(void)
    7047             : {
    7048             :         struct nvme_probe_skip_entry *entry, *entry_tmp;
    7049             :         struct discovery_ctx *ctx;
    7050             : 
    7051           1 :         spdk_poller_unregister(&g_hotplug_poller);
    7052           1 :         free(g_hotplug_probe_ctx);
    7053           1 :         g_hotplug_probe_ctx = NULL;
    7054             : 
    7055           1 :         TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
    7056           0 :                 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
    7057           0 :                 free(entry);
    7058             :         }
    7059             : 
    7060           1 :         assert(spdk_get_thread() == g_bdev_nvme_init_thread);
    7061           1 :         if (TAILQ_EMPTY(&g_discovery_ctxs)) {
    7062           1 :                 bdev_nvme_fini_destruct_ctrlrs();
    7063             :         } else {
    7064           0 :                 TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7065           0 :                         stop_discovery(ctx, check_discovery_fini, NULL);
    7066             :                 }
    7067             :         }
    7068           1 : }
    7069             : 
    7070             : static void
    7071           0 : bdev_nvme_verify_pi_error(struct nvme_bdev_io *bio)
    7072             : {
    7073           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7074           0 :         struct spdk_bdev *bdev = bdev_io->bdev;
    7075           0 :         struct spdk_dif_ctx dif_ctx;
    7076           0 :         struct spdk_dif_error err_blk = {};
    7077             :         int rc;
    7078           0 :         struct spdk_dif_ctx_init_ext_opts dif_opts;
    7079             : 
    7080           0 :         dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
    7081           0 :         dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
    7082           0 :         rc = spdk_dif_ctx_init(&dif_ctx,
    7083           0 :                                bdev->blocklen, bdev->md_len, bdev->md_interleave,
    7084           0 :                                bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags,
    7085           0 :                                bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0, &dif_opts);
    7086           0 :         if (rc != 0) {
    7087           0 :                 SPDK_ERRLOG("Initialization of DIF context failed\n");
    7088           0 :                 return;
    7089             :         }
    7090             : 
    7091           0 :         if (bdev->md_interleave) {
    7092           0 :                 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
    7093           0 :                                      bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
    7094             :         } else {
    7095           0 :                 struct iovec md_iov = {
    7096           0 :                         .iov_base       = bdev_io->u.bdev.md_buf,
    7097           0 :                         .iov_len        = bdev_io->u.bdev.num_blocks * bdev->md_len,
    7098             :                 };
    7099             : 
    7100           0 :                 rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
    7101           0 :                                      &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
    7102             :         }
    7103             : 
    7104           0 :         if (rc != 0) {
    7105           0 :                 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
    7106             :                             err_blk.err_type, err_blk.err_offset);
    7107             :         } else {
    7108           0 :                 SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
    7109             :         }
    7110             : }
    7111             : 
    7112             : static void
    7113           0 : bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7114             : {
    7115           0 :         struct nvme_bdev_io *bio = ref;
    7116             : 
    7117           0 :         if (spdk_nvme_cpl_is_success(cpl)) {
    7118             :                 /* Run PI verification for read data buffer. */
    7119           0 :                 bdev_nvme_verify_pi_error(bio);
    7120             :         }
    7121             : 
    7122             :         /* Return original completion status */
    7123           0 :         bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
    7124           0 : }
    7125             : 
    7126             : static void
    7127           3 : bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7128             : {
    7129           3 :         struct nvme_bdev_io *bio = ref;
    7130           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7131             :         int ret;
    7132             : 
    7133           3 :         if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
    7134           0 :                 SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
    7135             :                             cpl->status.sct, cpl->status.sc);
    7136             : 
    7137             :                 /* Save completion status to use after verifying PI error. */
    7138           0 :                 bio->cpl = *cpl;
    7139             : 
    7140           0 :                 if (spdk_likely(nvme_io_path_is_available(bio->io_path))) {
    7141             :                         /* Read without PI checking to verify PI error. */
    7142           0 :                         ret = bdev_nvme_no_pi_readv(bio,
    7143             :                                                     bdev_io->u.bdev.iovs,
    7144             :                                                     bdev_io->u.bdev.iovcnt,
    7145             :                                                     bdev_io->u.bdev.md_buf,
    7146             :                                                     bdev_io->u.bdev.num_blocks,
    7147             :                                                     bdev_io->u.bdev.offset_blocks);
    7148           0 :                         if (ret == 0) {
    7149           0 :                                 return;
    7150             :                         }
    7151             :                 }
    7152             :         }
    7153             : 
    7154           3 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7155             : }
    7156             : 
    7157             : static void
    7158          25 : bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7159             : {
    7160          25 :         struct nvme_bdev_io *bio = ref;
    7161             : 
    7162          25 :         if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
    7163           0 :                 SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
    7164             :                             cpl->status.sct, cpl->status.sc);
    7165             :                 /* Run PI verification for write data buffer if PI error is detected. */
    7166           0 :                 bdev_nvme_verify_pi_error(bio);
    7167             :         }
    7168             : 
    7169          25 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7170          25 : }
    7171             : 
    7172             : static void
    7173           0 : bdev_nvme_zone_appendv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7174             : {
    7175           0 :         struct nvme_bdev_io *bio = ref;
    7176           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7177             : 
    7178             :         /* spdk_bdev_io_get_append_location() requires that the ALBA is stored in offset_blocks.
    7179             :          * Additionally, offset_blocks has to be set before calling bdev_nvme_verify_pi_error().
    7180             :          */
    7181           0 :         bdev_io->u.bdev.offset_blocks = *(uint64_t *)&cpl->cdw0;
    7182             : 
    7183           0 :         if (spdk_nvme_cpl_is_pi_error(cpl)) {
    7184           0 :                 SPDK_ERRLOG("zone append completed with PI error (sct=%d, sc=%d)\n",
    7185             :                             cpl->status.sct, cpl->status.sc);
    7186             :                 /* Run PI verification for zone append data buffer if PI error is detected. */
    7187           0 :                 bdev_nvme_verify_pi_error(bio);
    7188             :         }
    7189             : 
    7190           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7191           0 : }
    7192             : 
    7193             : static void
    7194           1 : bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7195             : {
    7196           1 :         struct nvme_bdev_io *bio = ref;
    7197             : 
    7198           1 :         if (spdk_nvme_cpl_is_pi_error(cpl)) {
    7199           0 :                 SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n",
    7200             :                             cpl->status.sct, cpl->status.sc);
    7201             :                 /* Run PI verification for compare data buffer if PI error is detected. */
    7202           0 :                 bdev_nvme_verify_pi_error(bio);
    7203             :         }
    7204             : 
    7205           1 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7206           1 : }
    7207             : 
    7208             : static void
    7209           4 : bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7210             : {
    7211           4 :         struct nvme_bdev_io *bio = ref;
    7212             : 
    7213             :         /* Compare operation completion */
    7214           4 :         if (!bio->first_fused_completed) {
    7215             :                 /* Save compare result for write callback */
    7216           2 :                 bio->cpl = *cpl;
    7217           2 :                 bio->first_fused_completed = true;
    7218           2 :                 return;
    7219             :         }
    7220             : 
    7221             :         /* Write operation completion */
    7222           2 :         if (spdk_nvme_cpl_is_error(&bio->cpl)) {
    7223             :                 /* If bio->cpl is already an error, it means the compare operation failed.  In that case,
    7224             :                  * complete the IO with the compare operation's status.
    7225             :                  */
    7226           1 :                 if (!spdk_nvme_cpl_is_error(cpl)) {
    7227           1 :                         SPDK_ERRLOG("Unexpected write success after compare failure.\n");
    7228             :                 }
    7229             : 
    7230           1 :                 bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
    7231             :         } else {
    7232           1 :                 bdev_nvme_io_complete_nvme_status(bio, cpl);
    7233             :         }
    7234             : }
    7235             : 
    7236             : static void
    7237           1 : bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7238             : {
    7239           1 :         struct nvme_bdev_io *bio = ref;
    7240             : 
    7241           1 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7242           1 : }
    7243             : 
    7244             : static int
    7245           0 : fill_zone_from_report(struct spdk_bdev_zone_info *info, struct spdk_nvme_zns_zone_desc *desc)
    7246             : {
    7247           0 :         switch (desc->zt) {
    7248           0 :         case SPDK_NVME_ZONE_TYPE_SEQWR:
    7249           0 :                 info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
    7250           0 :                 break;
    7251           0 :         default:
    7252           0 :                 SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", desc->zt);
    7253           0 :                 return -EIO;
    7254             :         }
    7255             : 
    7256           0 :         switch (desc->zs) {
    7257           0 :         case SPDK_NVME_ZONE_STATE_EMPTY:
    7258           0 :                 info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
    7259           0 :                 break;
    7260           0 :         case SPDK_NVME_ZONE_STATE_IOPEN:
    7261           0 :                 info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
    7262           0 :                 break;
    7263           0 :         case SPDK_NVME_ZONE_STATE_EOPEN:
    7264           0 :                 info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
    7265           0 :                 break;
    7266           0 :         case SPDK_NVME_ZONE_STATE_CLOSED:
    7267           0 :                 info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
    7268           0 :                 break;
    7269           0 :         case SPDK_NVME_ZONE_STATE_RONLY:
    7270           0 :                 info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
    7271           0 :                 break;
    7272           0 :         case SPDK_NVME_ZONE_STATE_FULL:
    7273           0 :                 info->state = SPDK_BDEV_ZONE_STATE_FULL;
    7274           0 :                 break;
    7275           0 :         case SPDK_NVME_ZONE_STATE_OFFLINE:
    7276           0 :                 info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
    7277           0 :                 break;
    7278           0 :         default:
    7279           0 :                 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", desc->zs);
    7280           0 :                 return -EIO;
    7281             :         }
    7282             : 
    7283           0 :         info->zone_id = desc->zslba;
    7284           0 :         info->write_pointer = desc->wp;
    7285           0 :         info->capacity = desc->zcap;
    7286             : 
    7287           0 :         return 0;
    7288             : }
    7289             : 
    7290             : static void
    7291           0 : bdev_nvme_get_zone_info_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7292             : {
    7293           0 :         struct nvme_bdev_io *bio = ref;
    7294           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7295           0 :         uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
    7296           0 :         uint32_t zones_to_copy = bdev_io->u.zone_mgmt.num_zones;
    7297           0 :         struct spdk_bdev_zone_info *info = bdev_io->u.zone_mgmt.buf;
    7298             :         uint64_t max_zones_per_buf, i;
    7299             :         uint32_t zone_report_bufsize;
    7300             :         struct spdk_nvme_ns *ns;
    7301             :         struct spdk_nvme_qpair *qpair;
    7302             :         int ret;
    7303             : 
    7304           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    7305           0 :                 goto out_complete_io_nvme_cpl;
    7306             :         }
    7307             : 
    7308           0 :         if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
    7309           0 :                 ret = -ENXIO;
    7310           0 :                 goto out_complete_io_ret;
    7311             :         }
    7312             : 
    7313           0 :         ns = bio->io_path->nvme_ns->ns;
    7314           0 :         qpair = bio->io_path->qpair->qpair;
    7315             : 
    7316           0 :         zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
    7317           0 :         max_zones_per_buf = (zone_report_bufsize - sizeof(*bio->zone_report_buf)) /
    7318             :                             sizeof(bio->zone_report_buf->descs[0]);
    7319             : 
    7320           0 :         if (bio->zone_report_buf->nr_zones > max_zones_per_buf) {
    7321           0 :                 ret = -EINVAL;
    7322           0 :                 goto out_complete_io_ret;
    7323             :         }
    7324             : 
    7325           0 :         if (!bio->zone_report_buf->nr_zones) {
    7326           0 :                 ret = -EINVAL;
    7327           0 :                 goto out_complete_io_ret;
    7328             :         }
    7329             : 
    7330           0 :         for (i = 0; i < bio->zone_report_buf->nr_zones && bio->handled_zones < zones_to_copy; i++) {
    7331           0 :                 ret = fill_zone_from_report(&info[bio->handled_zones],
    7332           0 :                                             &bio->zone_report_buf->descs[i]);
    7333           0 :                 if (ret) {
    7334           0 :                         goto out_complete_io_ret;
    7335             :                 }
    7336           0 :                 bio->handled_zones++;
    7337             :         }
    7338             : 
    7339           0 :         if (bio->handled_zones < zones_to_copy) {
    7340           0 :                 uint64_t zone_size_lba = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    7341           0 :                 uint64_t slba = zone_id + (zone_size_lba * bio->handled_zones);
    7342             : 
    7343           0 :                 memset(bio->zone_report_buf, 0, zone_report_bufsize);
    7344           0 :                 ret = spdk_nvme_zns_report_zones(ns, qpair,
    7345           0 :                                                  bio->zone_report_buf, zone_report_bufsize,
    7346             :                                                  slba, SPDK_NVME_ZRA_LIST_ALL, true,
    7347             :                                                  bdev_nvme_get_zone_info_done, bio);
    7348           0 :                 if (!ret) {
    7349           0 :                         return;
    7350             :                 } else {
    7351           0 :                         goto out_complete_io_ret;
    7352             :                 }
    7353             :         }
    7354             : 
    7355           0 : out_complete_io_nvme_cpl:
    7356           0 :         free(bio->zone_report_buf);
    7357           0 :         bio->zone_report_buf = NULL;
    7358           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7359           0 :         return;
    7360             : 
    7361           0 : out_complete_io_ret:
    7362           0 :         free(bio->zone_report_buf);
    7363           0 :         bio->zone_report_buf = NULL;
    7364           0 :         bdev_nvme_io_complete(bio, ret);
    7365             : }
    7366             : 
    7367             : static void
    7368           0 : bdev_nvme_zone_management_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7369             : {
    7370           0 :         struct nvme_bdev_io *bio = ref;
    7371             : 
    7372           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7373           0 : }
    7374             : 
    7375             : static void
    7376           4 : bdev_nvme_admin_passthru_complete_nvme_status(void *ctx)
    7377             : {
    7378           4 :         struct nvme_bdev_io *bio = ctx;
    7379           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7380           4 :         const struct spdk_nvme_cpl *cpl = &bio->cpl;
    7381             : 
    7382           4 :         assert(bdev_nvme_io_type_is_admin(bdev_io->type));
    7383             : 
    7384           4 :         __bdev_nvme_io_complete(bdev_io, 0, cpl);
    7385           4 : }
    7386             : 
    7387             : static void
    7388           3 : bdev_nvme_abort_complete(void *ctx)
    7389             : {
    7390           3 :         struct nvme_bdev_io *bio = ctx;
    7391           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7392             : 
    7393           3 :         if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) {
    7394           3 :                 __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS, NULL);
    7395             :         } else {
    7396           0 :                 __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED, NULL);
    7397             :         }
    7398           3 : }
    7399             : 
    7400             : static void
    7401           3 : bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7402             : {
    7403           3 :         struct nvme_bdev_io *bio = ref;
    7404           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7405             : 
    7406           3 :         bio->cpl = *cpl;
    7407           3 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_nvme_abort_complete, bio);
    7408           3 : }
    7409             : 
    7410             : static void
    7411           4 : bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7412             : {
    7413           4 :         struct nvme_bdev_io *bio = ref;
    7414           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7415             : 
    7416           4 :         bio->cpl = *cpl;
    7417           4 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
    7418             :                              bdev_nvme_admin_passthru_complete_nvme_status, bio);
    7419           4 : }
    7420             : 
    7421             : static void
    7422           0 : bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
    7423             : {
    7424           0 :         struct nvme_bdev_io *bio = ref;
    7425             :         struct iovec *iov;
    7426             : 
    7427           0 :         bio->iov_offset = sgl_offset;
    7428           0 :         for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
    7429           0 :                 iov = &bio->iovs[bio->iovpos];
    7430           0 :                 if (bio->iov_offset < iov->iov_len) {
    7431           0 :                         break;
    7432             :                 }
    7433             : 
    7434           0 :                 bio->iov_offset -= iov->iov_len;
    7435             :         }
    7436           0 : }
    7437             : 
    7438             : static int
    7439           0 : bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
    7440             : {
    7441           0 :         struct nvme_bdev_io *bio = ref;
    7442             :         struct iovec *iov;
    7443             : 
    7444           0 :         assert(bio->iovpos < bio->iovcnt);
    7445             : 
    7446           0 :         iov = &bio->iovs[bio->iovpos];
    7447             : 
    7448           0 :         *address = iov->iov_base;
    7449           0 :         *length = iov->iov_len;
    7450             : 
    7451           0 :         if (bio->iov_offset) {
    7452           0 :                 assert(bio->iov_offset <= iov->iov_len);
    7453           0 :                 *address += bio->iov_offset;
    7454           0 :                 *length -= bio->iov_offset;
    7455             :         }
    7456             : 
    7457           0 :         bio->iov_offset += *length;
    7458           0 :         if (bio->iov_offset == iov->iov_len) {
    7459           0 :                 bio->iovpos++;
    7460           0 :                 bio->iov_offset = 0;
    7461             :         }
    7462             : 
    7463           0 :         return 0;
    7464             : }
    7465             : 
    7466             : static void
    7467           0 : bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset)
    7468             : {
    7469           0 :         struct nvme_bdev_io *bio = ref;
    7470             :         struct iovec *iov;
    7471             : 
    7472           0 :         bio->fused_iov_offset = sgl_offset;
    7473           0 :         for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) {
    7474           0 :                 iov = &bio->fused_iovs[bio->fused_iovpos];
    7475           0 :                 if (bio->fused_iov_offset < iov->iov_len) {
    7476           0 :                         break;
    7477             :                 }
    7478             : 
    7479           0 :                 bio->fused_iov_offset -= iov->iov_len;
    7480             :         }
    7481           0 : }
    7482             : 
    7483             : static int
    7484           0 : bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length)
    7485             : {
    7486           0 :         struct nvme_bdev_io *bio = ref;
    7487             :         struct iovec *iov;
    7488             : 
    7489           0 :         assert(bio->fused_iovpos < bio->fused_iovcnt);
    7490             : 
    7491           0 :         iov = &bio->fused_iovs[bio->fused_iovpos];
    7492             : 
    7493           0 :         *address = iov->iov_base;
    7494           0 :         *length = iov->iov_len;
    7495             : 
    7496           0 :         if (bio->fused_iov_offset) {
    7497           0 :                 assert(bio->fused_iov_offset <= iov->iov_len);
    7498           0 :                 *address += bio->fused_iov_offset;
    7499           0 :                 *length -= bio->fused_iov_offset;
    7500             :         }
    7501             : 
    7502           0 :         bio->fused_iov_offset += *length;
    7503           0 :         if (bio->fused_iov_offset == iov->iov_len) {
    7504           0 :                 bio->fused_iovpos++;
    7505           0 :                 bio->fused_iov_offset = 0;
    7506             :         }
    7507             : 
    7508           0 :         return 0;
    7509             : }
    7510             : 
    7511             : static int
    7512           0 : bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7513             :                       void *md, uint64_t lba_count, uint64_t lba)
    7514             : {
    7515             :         int rc;
    7516             : 
    7517           0 :         SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 " without PI check\n",
    7518             :                       lba_count, lba);
    7519             : 
    7520           0 :         bio->iovs = iov;
    7521           0 :         bio->iovcnt = iovcnt;
    7522           0 :         bio->iovpos = 0;
    7523           0 :         bio->iov_offset = 0;
    7524             : 
    7525           0 :         rc = spdk_nvme_ns_cmd_readv_with_md(bio->io_path->nvme_ns->ns,
    7526           0 :                                             bio->io_path->qpair->qpair,
    7527             :                                             lba, lba_count,
    7528             :                                             bdev_nvme_no_pi_readv_done, bio, 0,
    7529             :                                             bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    7530             :                                             md, 0, 0);
    7531             : 
    7532           0 :         if (rc != 0 && rc != -ENOMEM) {
    7533           0 :                 SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
    7534             :         }
    7535           0 :         return rc;
    7536             : }
    7537             : 
    7538             : static int
    7539           3 : bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7540             :                 void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
    7541             :                 struct spdk_memory_domain *domain, void *domain_ctx,
    7542             :                 struct spdk_accel_sequence *seq)
    7543             : {
    7544           3 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7545           3 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7546             :         int rc;
    7547             : 
    7548           3 :         SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    7549             :                       lba_count, lba);
    7550             : 
    7551           3 :         bio->iovs = iov;
    7552           3 :         bio->iovcnt = iovcnt;
    7553           3 :         bio->iovpos = 0;
    7554           3 :         bio->iov_offset = 0;
    7555             : 
    7556           3 :         if (domain != NULL || seq != NULL) {
    7557           1 :                 bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
    7558           1 :                 bio->ext_opts.memory_domain = domain;
    7559           1 :                 bio->ext_opts.memory_domain_ctx = domain_ctx;
    7560           1 :                 bio->ext_opts.io_flags = flags;
    7561           1 :                 bio->ext_opts.metadata = md;
    7562           1 :                 bio->ext_opts.accel_sequence = seq;
    7563             : 
    7564           1 :                 rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count,
    7565             :                                                 bdev_nvme_readv_done, bio,
    7566             :                                                 bdev_nvme_queued_reset_sgl,
    7567             :                                                 bdev_nvme_queued_next_sge,
    7568             :                                                 &bio->ext_opts);
    7569           2 :         } else if (iovcnt == 1) {
    7570           2 :                 rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base,
    7571             :                                                    md, lba, lba_count, bdev_nvme_readv_done,
    7572             :                                                    bio, flags, 0, 0);
    7573             :         } else {
    7574           0 :                 rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count,
    7575             :                                                     bdev_nvme_readv_done, bio, flags,
    7576             :                                                     bdev_nvme_queued_reset_sgl,
    7577             :                                                     bdev_nvme_queued_next_sge, md, 0, 0);
    7578             :         }
    7579             : 
    7580           3 :         if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
    7581           0 :                 SPDK_ERRLOG("readv failed: rc = %d\n", rc);
    7582             :         }
    7583           3 :         return rc;
    7584             : }
    7585             : 
    7586             : static int
    7587          25 : bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7588             :                  void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
    7589             :                  struct spdk_memory_domain *domain, void *domain_ctx,
    7590             :                  struct spdk_accel_sequence *seq)
    7591             : {
    7592          25 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7593          25 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7594             :         int rc;
    7595             : 
    7596          25 :         SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    7597             :                       lba_count, lba);
    7598             : 
    7599          25 :         bio->iovs = iov;
    7600          25 :         bio->iovcnt = iovcnt;
    7601          25 :         bio->iovpos = 0;
    7602          25 :         bio->iov_offset = 0;
    7603             : 
    7604          25 :         if (domain != NULL || seq != NULL) {
    7605           0 :                 bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
    7606           0 :                 bio->ext_opts.memory_domain = domain;
    7607           0 :                 bio->ext_opts.memory_domain_ctx = domain_ctx;
    7608           0 :                 bio->ext_opts.io_flags = flags;
    7609           0 :                 bio->ext_opts.metadata = md;
    7610           0 :                 bio->ext_opts.accel_sequence = seq;
    7611             : 
    7612           0 :                 rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count,
    7613             :                                                  bdev_nvme_writev_done, bio,
    7614             :                                                  bdev_nvme_queued_reset_sgl,
    7615             :                                                  bdev_nvme_queued_next_sge,
    7616             :                                                  &bio->ext_opts);
    7617          25 :         } else if (iovcnt == 1) {
    7618          25 :                 rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base,
    7619             :                                                     md, lba, lba_count, bdev_nvme_writev_done,
    7620             :                                                     bio, flags, 0, 0);
    7621             :         } else {
    7622           0 :                 rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
    7623             :                                                      bdev_nvme_writev_done, bio, flags,
    7624             :                                                      bdev_nvme_queued_reset_sgl,
    7625             :                                                      bdev_nvme_queued_next_sge, md, 0, 0);
    7626             :         }
    7627             : 
    7628          25 :         if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
    7629           0 :                 SPDK_ERRLOG("writev failed: rc = %d\n", rc);
    7630             :         }
    7631          25 :         return rc;
    7632             : }
    7633             : 
    7634             : static int
    7635           0 : bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7636             :                        void *md, uint64_t lba_count, uint64_t zslba,
    7637             :                        uint32_t flags)
    7638             : {
    7639           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7640           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7641             :         int rc;
    7642             : 
    7643           0 :         SPDK_DEBUGLOG(bdev_nvme, "zone append %" PRIu64 " blocks to zone start lba %#" PRIx64 "\n",
    7644             :                       lba_count, zslba);
    7645             : 
    7646           0 :         bio->iovs = iov;
    7647           0 :         bio->iovcnt = iovcnt;
    7648           0 :         bio->iovpos = 0;
    7649           0 :         bio->iov_offset = 0;
    7650             : 
    7651           0 :         if (iovcnt == 1) {
    7652           0 :                 rc = spdk_nvme_zns_zone_append_with_md(ns, qpair, iov[0].iov_base, md, zslba,
    7653             :                                                        lba_count,
    7654             :                                                        bdev_nvme_zone_appendv_done, bio,
    7655             :                                                        flags,
    7656             :                                                        0, 0);
    7657             :         } else {
    7658           0 :                 rc = spdk_nvme_zns_zone_appendv_with_md(ns, qpair, zslba, lba_count,
    7659             :                                                         bdev_nvme_zone_appendv_done, bio, flags,
    7660             :                                                         bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    7661             :                                                         md, 0, 0);
    7662             :         }
    7663             : 
    7664           0 :         if (rc != 0 && rc != -ENOMEM) {
    7665           0 :                 SPDK_ERRLOG("zone append failed: rc = %d\n", rc);
    7666             :         }
    7667           0 :         return rc;
    7668             : }
    7669             : 
    7670             : static int
    7671           1 : bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7672             :                    void *md, uint64_t lba_count, uint64_t lba,
    7673             :                    uint32_t flags)
    7674             : {
    7675             :         int rc;
    7676             : 
    7677           1 :         SPDK_DEBUGLOG(bdev_nvme, "compare %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    7678             :                       lba_count, lba);
    7679             : 
    7680           1 :         bio->iovs = iov;
    7681           1 :         bio->iovcnt = iovcnt;
    7682           1 :         bio->iovpos = 0;
    7683           1 :         bio->iov_offset = 0;
    7684             : 
    7685           1 :         rc = spdk_nvme_ns_cmd_comparev_with_md(bio->io_path->nvme_ns->ns,
    7686           1 :                                                bio->io_path->qpair->qpair,
    7687             :                                                lba, lba_count,
    7688             :                                                bdev_nvme_comparev_done, bio, flags,
    7689             :                                                bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    7690             :                                                md, 0, 0);
    7691             : 
    7692           1 :         if (rc != 0 && rc != -ENOMEM) {
    7693           0 :                 SPDK_ERRLOG("comparev failed: rc = %d\n", rc);
    7694             :         }
    7695           1 :         return rc;
    7696             : }
    7697             : 
    7698             : static int
    7699           2 : bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt,
    7700             :                               struct iovec *write_iov, int write_iovcnt,
    7701             :                               void *md, uint64_t lba_count, uint64_t lba, uint32_t flags)
    7702             : {
    7703           2 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7704           2 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7705           2 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7706             :         int rc;
    7707             : 
    7708           2 :         SPDK_DEBUGLOG(bdev_nvme, "compare and write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    7709             :                       lba_count, lba);
    7710             : 
    7711           2 :         bio->iovs = cmp_iov;
    7712           2 :         bio->iovcnt = cmp_iovcnt;
    7713           2 :         bio->iovpos = 0;
    7714           2 :         bio->iov_offset = 0;
    7715           2 :         bio->fused_iovs = write_iov;
    7716           2 :         bio->fused_iovcnt = write_iovcnt;
    7717           2 :         bio->fused_iovpos = 0;
    7718           2 :         bio->fused_iov_offset = 0;
    7719             : 
    7720           2 :         if (bdev_io->num_retries == 0) {
    7721           2 :                 bio->first_fused_submitted = false;
    7722           2 :                 bio->first_fused_completed = false;
    7723             :         }
    7724             : 
    7725           2 :         if (!bio->first_fused_submitted) {
    7726           2 :                 flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST;
    7727           2 :                 memset(&bio->cpl, 0, sizeof(bio->cpl));
    7728             : 
    7729           2 :                 rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count,
    7730             :                                                        bdev_nvme_comparev_and_writev_done, bio, flags,
    7731             :                                                        bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0);
    7732           2 :                 if (rc == 0) {
    7733           2 :                         bio->first_fused_submitted = true;
    7734           2 :                         flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST;
    7735             :                 } else {
    7736           0 :                         if (rc != -ENOMEM) {
    7737           0 :                                 SPDK_ERRLOG("compare failed: rc = %d\n", rc);
    7738             :                         }
    7739           0 :                         return rc;
    7740             :                 }
    7741             :         }
    7742             : 
    7743           2 :         flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND;
    7744             : 
    7745           2 :         rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
    7746             :                                              bdev_nvme_comparev_and_writev_done, bio, flags,
    7747             :                                              bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0);
    7748           2 :         if (rc != 0 && rc != -ENOMEM) {
    7749           0 :                 SPDK_ERRLOG("write failed: rc = %d\n", rc);
    7750           0 :                 rc = 0;
    7751             :         }
    7752             : 
    7753           2 :         return rc;
    7754             : }
    7755             : 
    7756             : static int
    7757           1 : bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
    7758             : {
    7759           1 :         struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
    7760             :         struct spdk_nvme_dsm_range *range;
    7761             :         uint64_t offset, remaining;
    7762             :         uint64_t num_ranges_u64;
    7763             :         uint16_t num_ranges;
    7764             :         int rc;
    7765             : 
    7766           1 :         num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
    7767             :                          SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    7768           1 :         if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
    7769           0 :                 SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
    7770           0 :                 return -EINVAL;
    7771             :         }
    7772           1 :         num_ranges = (uint16_t)num_ranges_u64;
    7773             : 
    7774           1 :         offset = offset_blocks;
    7775           1 :         remaining = num_blocks;
    7776           1 :         range = &dsm_ranges[0];
    7777             : 
    7778             :         /* Fill max-size ranges until the remaining blocks fit into one range */
    7779           1 :         while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
    7780           0 :                 range->attributes.raw = 0;
    7781           0 :                 range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    7782           0 :                 range->starting_lba = offset;
    7783             : 
    7784           0 :                 offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    7785           0 :                 remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    7786           0 :                 range++;
    7787             :         }
    7788             : 
    7789             :         /* Final range describes the remaining blocks */
    7790           1 :         range->attributes.raw = 0;
    7791           1 :         range->length = remaining;
    7792           1 :         range->starting_lba = offset;
    7793             : 
    7794           1 :         rc = spdk_nvme_ns_cmd_dataset_management(bio->io_path->nvme_ns->ns,
    7795           1 :                         bio->io_path->qpair->qpair,
    7796             :                         SPDK_NVME_DSM_ATTR_DEALLOCATE,
    7797             :                         dsm_ranges, num_ranges,
    7798             :                         bdev_nvme_queued_done, bio);
    7799             : 
    7800           1 :         return rc;
    7801             : }
    7802             : 
    7803             : static int
    7804           0 : bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
    7805             : {
    7806           0 :         if (num_blocks > UINT16_MAX + 1) {
    7807           0 :                 SPDK_ERRLOG("NVMe write zeroes is limited to 16-bit block count\n");
    7808           0 :                 return -EINVAL;
    7809             :         }
    7810             : 
    7811           0 :         return spdk_nvme_ns_cmd_write_zeroes(bio->io_path->nvme_ns->ns,
    7812           0 :                                              bio->io_path->qpair->qpair,
    7813             :                                              offset_blocks, num_blocks,
    7814             :                                              bdev_nvme_queued_done, bio,
    7815             :                                              0);
    7816             : }
    7817             : 
    7818             : static int
    7819           0 : bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones,
    7820             :                         struct spdk_bdev_zone_info *info)
    7821             : {
    7822           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7823           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7824           0 :         uint32_t zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
    7825           0 :         uint64_t zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    7826           0 :         uint64_t total_zones = spdk_nvme_zns_ns_get_num_zones(ns);
    7827             : 
    7828           0 :         if (zone_id % zone_size != 0) {
    7829           0 :                 return -EINVAL;
    7830             :         }
    7831             : 
    7832           0 :         if (num_zones > total_zones || !num_zones) {
    7833           0 :                 return -EINVAL;
    7834             :         }
    7835             : 
    7836           0 :         assert(!bio->zone_report_buf);
    7837           0 :         bio->zone_report_buf = calloc(1, zone_report_bufsize);
    7838           0 :         if (!bio->zone_report_buf) {
    7839           0 :                 return -ENOMEM;
    7840             :         }
    7841             : 
    7842           0 :         bio->handled_zones = 0;
    7843             : 
    7844           0 :         return spdk_nvme_zns_report_zones(ns, qpair, bio->zone_report_buf, zone_report_bufsize,
    7845             :                                           zone_id, SPDK_NVME_ZRA_LIST_ALL, true,
    7846             :                                           bdev_nvme_get_zone_info_done, bio);
    7847             : }
    7848             : 
    7849             : static int
    7850           0 : bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
    7851             :                           enum spdk_bdev_zone_action action)
    7852             : {
    7853           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7854           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7855             : 
    7856           0 :         switch (action) {
    7857           0 :         case SPDK_BDEV_ZONE_CLOSE:
    7858           0 :                 return spdk_nvme_zns_close_zone(ns, qpair, zone_id, false,
    7859             :                                                 bdev_nvme_zone_management_done, bio);
    7860           0 :         case SPDK_BDEV_ZONE_FINISH:
    7861           0 :                 return spdk_nvme_zns_finish_zone(ns, qpair, zone_id, false,
    7862             :                                                  bdev_nvme_zone_management_done, bio);
    7863           0 :         case SPDK_BDEV_ZONE_OPEN:
    7864           0 :                 return spdk_nvme_zns_open_zone(ns, qpair, zone_id, false,
    7865             :                                                bdev_nvme_zone_management_done, bio);
    7866           0 :         case SPDK_BDEV_ZONE_RESET:
    7867           0 :                 return spdk_nvme_zns_reset_zone(ns, qpair, zone_id, false,
    7868             :                                                 bdev_nvme_zone_management_done, bio);
    7869           0 :         case SPDK_BDEV_ZONE_OFFLINE:
    7870           0 :                 return spdk_nvme_zns_offline_zone(ns, qpair, zone_id, false,
    7871             :                                                   bdev_nvme_zone_management_done, bio);
    7872           0 :         default:
    7873           0 :                 return -EINVAL;
    7874             :         }
    7875             : }
    7876             : 
    7877             : static void
    7878           5 : bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
    7879             :                          struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
    7880             : {
    7881             :         struct nvme_io_path *io_path;
    7882             :         struct nvme_ctrlr *nvme_ctrlr;
    7883             :         uint32_t max_xfer_size;
    7884           5 :         int rc = -ENXIO;
    7885             : 
    7886             :         /* Choose the first ctrlr which is not failed. */
    7887           8 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    7888           7 :                 nvme_ctrlr = io_path->qpair->ctrlr;
    7889             : 
    7890             :                 /* We should skip any unavailable nvme_ctrlr rather than checking
    7891             :                  * if the return value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
    7892             :                  */
    7893           7 :                 if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
    7894           3 :                         continue;
    7895             :                 }
    7896             : 
    7897           4 :                 max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ctrlr->ctrlr);
    7898             : 
    7899           4 :                 if (nbytes > max_xfer_size) {
    7900           0 :                         SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    7901           0 :                         rc = -EINVAL;
    7902           0 :                         goto err;
    7903             :                 }
    7904             : 
    7905           4 :                 rc = spdk_nvme_ctrlr_cmd_admin_raw(nvme_ctrlr->ctrlr, cmd, buf, (uint32_t)nbytes,
    7906             :                                                    bdev_nvme_admin_passthru_done, bio);
    7907           4 :                 if (rc == 0) {
    7908           4 :                         return;
    7909             :                 }
    7910             :         }
    7911             : 
    7912           1 : err:
    7913           1 :         bdev_nvme_admin_complete(bio, rc);
    7914             : }
    7915             : 
    7916             : static int
    7917           0 : bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
    7918             :                       void *buf, size_t nbytes)
    7919             : {
    7920           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7921           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7922           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    7923           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    7924             : 
    7925           0 :         if (nbytes > max_xfer_size) {
    7926           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    7927           0 :                 return -EINVAL;
    7928             :         }
    7929             : 
    7930             :         /*
    7931             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
    7932             :          * so fill it out automatically.
    7933             :          */
    7934           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    7935             : 
    7936           0 :         return spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, cmd, buf,
    7937             :                                           (uint32_t)nbytes, bdev_nvme_queued_done, bio);
    7938             : }
    7939             : 
    7940             : static int
    7941           0 : bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
    7942             :                          void *buf, size_t nbytes, void *md_buf, size_t md_len)
    7943             : {
    7944           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7945           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7946           0 :         size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
    7947           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    7948           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    7949             : 
    7950           0 :         if (nbytes > max_xfer_size) {
    7951           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    7952           0 :                 return -EINVAL;
    7953             :         }
    7954             : 
    7955           0 :         if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
    7956           0 :                 SPDK_ERRLOG("invalid meta data buffer size\n");
    7957           0 :                 return -EINVAL;
    7958             :         }
    7959             : 
    7960             :         /*
    7961             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
    7962             :          * so fill it out automatically.
    7963             :          */
    7964           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    7965             : 
    7966           0 :         return spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, qpair, cmd, buf,
    7967             :                         (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
    7968             : }
    7969             : 
    7970             : static int
    7971           0 : bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio,
    7972             :                           struct spdk_nvme_cmd *cmd, struct iovec *iov, int iovcnt,
    7973             :                           size_t nbytes, void *md_buf, size_t md_len)
    7974             : {
    7975           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7976           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7977           0 :         size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
    7978           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    7979           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    7980             : 
    7981           0 :         bio->iovs = iov;
    7982           0 :         bio->iovcnt = iovcnt;
    7983           0 :         bio->iovpos = 0;
    7984           0 :         bio->iov_offset = 0;
    7985             : 
    7986           0 :         if (nbytes > max_xfer_size) {
    7987           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    7988           0 :                 return -EINVAL;
    7989             :         }
    7990             : 
    7991           0 :         if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
    7992           0 :                 SPDK_ERRLOG("invalid meta data buffer size\n");
    7993           0 :                 return -EINVAL;
    7994             :         }
    7995             : 
    7996             :         /*
    7997             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands
    7998             :          * require a nsid, so fill it out automatically.
    7999             :          */
    8000           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8001             : 
    8002           0 :         return spdk_nvme_ctrlr_cmd_iov_raw_with_md(
    8003             :                        ctrlr, qpair, cmd, (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio,
    8004             :                        bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
    8005             : }
    8006             : 
    8007             : static void
    8008           6 : bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
    8009             :                 struct nvme_bdev_io *bio_to_abort)
    8010             : {
    8011             :         struct nvme_io_path *io_path;
    8012           6 :         int rc = 0;
    8013             : 
    8014           6 :         rc = bdev_nvme_abort_retry_io(nbdev_ch, bio_to_abort);
    8015           6 :         if (rc == 0) {
    8016           1 :                 bdev_nvme_admin_complete(bio, 0);
    8017           1 :                 return;
    8018             :         }
    8019             : 
    8020           5 :         io_path = bio_to_abort->io_path;
    8021           5 :         if (io_path != NULL) {
    8022           3 :                 rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
    8023           3 :                                                    io_path->qpair->qpair,
    8024             :                                                    bio_to_abort,
    8025             :                                                    bdev_nvme_abort_done, bio);
    8026             :         } else {
    8027           3 :                 STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    8028           2 :                         rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
    8029             :                                                            NULL,
    8030             :                                                            bio_to_abort,
    8031             :                                                            bdev_nvme_abort_done, bio);
    8032             : 
    8033           2 :                         if (rc != -ENOENT) {
    8034           1 :                                 break;
    8035             :                         }
    8036             :                 }
    8037             :         }
    8038             : 
    8039           5 :         if (rc != 0) {
    8040             :                 /* If no command was found or there was any error, complete the abort
    8041             :                  * request with failure.
    8042             :                  */
    8043           2 :                 bdev_nvme_admin_complete(bio, rc);
    8044             :         }
    8045             : }
    8046             : 
    8047             : static int
    8048           0 : bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
    8049             :                uint64_t num_blocks)
    8050             : {
    8051           0 :         struct spdk_nvme_scc_source_range range = {
    8052             :                 .slba = src_offset_blocks,
    8053           0 :                 .nlb = num_blocks - 1
    8054             :         };
    8055             : 
    8056           0 :         return spdk_nvme_ns_cmd_copy(bio->io_path->nvme_ns->ns,
    8057           0 :                                      bio->io_path->qpair->qpair,
    8058             :                                      &range, 1, dst_offset_blocks,
    8059             :                                      bdev_nvme_queued_done, bio);
    8060             : }
    8061             : 
    8062             : static void
    8063           0 : bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
    8064             : {
    8065             :         const char      *action;
    8066             : 
    8067           0 :         if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
    8068           0 :                 action = "reset";
    8069           0 :         } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
    8070           0 :                 action = "abort";
    8071             :         } else {
    8072           0 :                 action = "none";
    8073             :         }
    8074             : 
    8075           0 :         spdk_json_write_object_begin(w);
    8076             : 
    8077           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_set_options");
    8078             : 
    8079           0 :         spdk_json_write_named_object_begin(w, "params");
    8080           0 :         spdk_json_write_named_string(w, "action_on_timeout", action);
    8081           0 :         spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
    8082           0 :         spdk_json_write_named_uint64(w, "timeout_admin_us", g_opts.timeout_admin_us);
    8083           0 :         spdk_json_write_named_uint32(w, "keep_alive_timeout_ms", g_opts.keep_alive_timeout_ms);
    8084           0 :         spdk_json_write_named_uint32(w, "transport_retry_count", g_opts.transport_retry_count);
    8085           0 :         spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst);
    8086           0 :         spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight);
    8087           0 :         spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight);
    8088           0 :         spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight);
    8089           0 :         spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
    8090           0 :         spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
    8091           0 :         spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
    8092           0 :         spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
    8093           0 :         spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
    8094           0 :         spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
    8095           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
    8096           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
    8097           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
    8098           0 :         spdk_json_write_named_bool(w, "generate_uuids", g_opts.generate_uuids);
    8099           0 :         spdk_json_write_named_uint8(w, "transport_tos", g_opts.transport_tos);
    8100           0 :         spdk_json_write_named_bool(w, "io_path_stat", g_opts.io_path_stat);
    8101           0 :         spdk_json_write_named_bool(w, "allow_accel_sequence", g_opts.allow_accel_sequence);
    8102           0 :         spdk_json_write_object_end(w);
    8103             : 
    8104           0 :         spdk_json_write_object_end(w);
    8105           0 : }
    8106             : 
    8107             : static void
    8108           0 : bdev_nvme_discovery_config_json(struct spdk_json_write_ctx *w, struct discovery_ctx *ctx)
    8109             : {
    8110           0 :         struct spdk_nvme_transport_id trid;
    8111             : 
    8112           0 :         spdk_json_write_object_begin(w);
    8113             : 
    8114           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_start_discovery");
    8115             : 
    8116           0 :         spdk_json_write_named_object_begin(w, "params");
    8117           0 :         spdk_json_write_named_string(w, "name", ctx->name);
    8118           0 :         spdk_json_write_named_string(w, "hostnqn", ctx->hostnqn);
    8119             : 
    8120           0 :         trid = ctx->trid;
    8121           0 :         memset(trid.subnqn, 0, sizeof(trid.subnqn));
    8122           0 :         nvme_bdev_dump_trid_json(&trid, w);
    8123             : 
    8124           0 :         spdk_json_write_named_bool(w, "wait_for_attach", ctx->wait_for_attach);
    8125           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", ctx->bdev_opts.ctrlr_loss_timeout_sec);
    8126           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", ctx->bdev_opts.reconnect_delay_sec);
    8127           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
    8128             :                                      ctx->bdev_opts.fast_io_fail_timeout_sec);
    8129           0 :         spdk_json_write_object_end(w);
    8130             : 
    8131           0 :         spdk_json_write_object_end(w);
    8132           0 : }
    8133             : 
    8134             : #ifdef SPDK_CONFIG_NVME_CUSE
    8135             : static void
    8136           0 : nvme_ctrlr_cuse_config_json(struct spdk_json_write_ctx *w,
    8137             :                             struct nvme_ctrlr *nvme_ctrlr)
    8138           0 : {
    8139           0 :         size_t cuse_name_size = 128;
    8140           0 :         char cuse_name[cuse_name_size];
    8141             : 
    8142           0 :         if (spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr,
    8143             :                                           cuse_name, &cuse_name_size) != 0) {
    8144           0 :                 return;
    8145             :         }
    8146             : 
    8147           0 :         spdk_json_write_object_begin(w);
    8148             : 
    8149           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_cuse_register");
    8150             : 
    8151           0 :         spdk_json_write_named_object_begin(w, "params");
    8152           0 :         spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
    8153           0 :         spdk_json_write_object_end(w);
    8154             : 
    8155           0 :         spdk_json_write_object_end(w);
    8156             : }
    8157             : #endif
    8158             : 
    8159             : static void
    8160           0 : nvme_ctrlr_config_json(struct spdk_json_write_ctx *w,
    8161             :                        struct nvme_ctrlr *nvme_ctrlr)
    8162             : {
    8163             :         struct spdk_nvme_transport_id   *trid;
    8164             :         const struct spdk_nvme_ctrlr_opts *opts;
    8165             : 
    8166           0 :         if (nvme_ctrlr->opts.from_discovery_service) {
    8167             :                 /* Do not emit an RPC for this - it will be implicitly
    8168             :                  * covered by a separate bdev_nvme_start_discovery or
    8169             :                  * bdev_nvme_start_mdns_discovery RPC.
    8170             :                  */
    8171           0 :                 return;
    8172             :         }
    8173             : 
    8174           0 :         trid = &nvme_ctrlr->active_path_id->trid;
    8175             : 
    8176           0 :         spdk_json_write_object_begin(w);
    8177             : 
    8178           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller");
    8179             : 
    8180           0 :         spdk_json_write_named_object_begin(w, "params");
    8181           0 :         spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
    8182           0 :         nvme_bdev_dump_trid_json(trid, w);
    8183           0 :         spdk_json_write_named_bool(w, "prchk_reftag",
    8184           0 :                                    (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
    8185           0 :         spdk_json_write_named_bool(w, "prchk_guard",
    8186           0 :                                    (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
    8187           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->opts.ctrlr_loss_timeout_sec);
    8188           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->opts.reconnect_delay_sec);
    8189           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
    8190             :                                      nvme_ctrlr->opts.fast_io_fail_timeout_sec);
    8191           0 :         if (nvme_ctrlr->opts.psk_path[0] != '\0') {
    8192           0 :                 spdk_json_write_named_string(w, "psk", nvme_ctrlr->opts.psk_path);
    8193             :         }
    8194             : 
    8195           0 :         opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
    8196           0 :         spdk_json_write_named_string(w, "hostnqn", opts->hostnqn);
    8197           0 :         spdk_json_write_named_bool(w, "hdgst", opts->header_digest);
    8198           0 :         spdk_json_write_named_bool(w, "ddgst", opts->data_digest);
    8199             : 
    8200           0 :         spdk_json_write_object_end(w);
    8201             : 
    8202           0 :         spdk_json_write_object_end(w);
    8203             : }
    8204             : 
    8205             : static void
    8206           0 : bdev_nvme_hotplug_config_json(struct spdk_json_write_ctx *w)
    8207             : {
    8208           0 :         spdk_json_write_object_begin(w);
    8209           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug");
    8210             : 
    8211           0 :         spdk_json_write_named_object_begin(w, "params");
    8212           0 :         spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
    8213           0 :         spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
    8214           0 :         spdk_json_write_object_end(w);
    8215             : 
    8216           0 :         spdk_json_write_object_end(w);
    8217           0 : }
    8218             : 
    8219             : static int
    8220           0 : bdev_nvme_config_json(struct spdk_json_write_ctx *w)
    8221             : {
    8222             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
    8223             :         struct nvme_ctrlr       *nvme_ctrlr;
    8224             :         struct discovery_ctx    *ctx;
    8225             : 
    8226           0 :         bdev_nvme_opts_config_json(w);
    8227             : 
    8228           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    8229             : 
    8230           0 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
    8231           0 :                 TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    8232           0 :                         nvme_ctrlr_config_json(w, nvme_ctrlr);
    8233             : 
    8234             : #ifdef SPDK_CONFIG_NVME_CUSE
    8235           0 :                         nvme_ctrlr_cuse_config_json(w, nvme_ctrlr);
    8236             : #endif
    8237             :                 }
    8238             :         }
    8239             : 
    8240           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    8241           0 :                 if (!ctx->from_mdns_discovery_service) {
    8242           0 :                         bdev_nvme_discovery_config_json(w, ctx);
    8243             :                 }
    8244             :         }
    8245             : 
    8246           0 :         bdev_nvme_mdns_discovery_config_json(w);
    8247             : 
    8248             :         /* Dump as last parameter to give all NVMe bdevs chance to be constructed
    8249             :          * before enabling hotplug poller.
    8250             :          */
    8251           0 :         bdev_nvme_hotplug_config_json(w);
    8252             : 
    8253           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    8254           0 :         return 0;
    8255             : }
    8256             : 
    8257             : struct spdk_nvme_ctrlr *
    8258           1 : bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
    8259             : {
    8260             :         struct nvme_bdev *nbdev;
    8261             :         struct nvme_ns *nvme_ns;
    8262             : 
    8263           1 :         if (!bdev || bdev->module != &nvme_if) {
    8264           0 :                 return NULL;
    8265             :         }
    8266             : 
    8267           1 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    8268           1 :         nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    8269           1 :         assert(nvme_ns != NULL);
    8270             : 
    8271           1 :         return nvme_ns->ctrlr->ctrlr;
    8272             : }
    8273             : 
    8274             : void
    8275           0 : nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path)
    8276             : {
    8277           0 :         struct nvme_ns *nvme_ns = io_path->nvme_ns;
    8278           0 :         struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
    8279             :         const struct spdk_nvme_ctrlr_data *cdata;
    8280             :         const struct spdk_nvme_transport_id *trid;
    8281             :         const char *adrfam_str;
    8282             : 
    8283           0 :         spdk_json_write_object_begin(w);
    8284             : 
    8285           0 :         spdk_json_write_named_string(w, "bdev_name", nvme_ns->bdev->disk.name);
    8286             : 
    8287           0 :         cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    8288           0 :         trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);
    8289             : 
    8290           0 :         spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
    8291           0 :         spdk_json_write_named_bool(w, "current", io_path->nbdev_ch != NULL &&
    8292           0 :                                    io_path == io_path->nbdev_ch->current_io_path);
    8293           0 :         spdk_json_write_named_bool(w, "connected", nvme_qpair_is_connected(io_path->qpair));
    8294           0 :         spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));
    8295             : 
    8296           0 :         spdk_json_write_named_object_begin(w, "transport");
    8297           0 :         spdk_json_write_named_string(w, "trtype", trid->trstring);
    8298           0 :         spdk_json_write_named_string(w, "traddr", trid->traddr);
    8299           0 :         if (trid->trsvcid[0] != '\0') {
    8300           0 :                 spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
    8301             :         }
    8302           0 :         adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
    8303           0 :         if (adrfam_str) {
    8304           0 :                 spdk_json_write_named_string(w, "adrfam", adrfam_str);
    8305             :         }
    8306           0 :         spdk_json_write_object_end(w);
    8307             : 
    8308           0 :         spdk_json_write_object_end(w);
    8309           0 : }
    8310             : 
    8311             : void
    8312           0 : bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w)
    8313             : {
    8314             :         struct discovery_ctx *ctx;
    8315             :         struct discovery_entry_ctx *entry_ctx;
    8316             : 
    8317           0 :         spdk_json_write_array_begin(w);
    8318           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    8319           0 :                 spdk_json_write_object_begin(w);
    8320           0 :                 spdk_json_write_named_string(w, "name", ctx->name);
    8321             : 
    8322           0 :                 spdk_json_write_named_object_begin(w, "trid");
    8323           0 :                 nvme_bdev_dump_trid_json(&ctx->trid, w);
    8324           0 :                 spdk_json_write_object_end(w);
    8325             : 
    8326           0 :                 spdk_json_write_named_array_begin(w, "referrals");
    8327           0 :                 TAILQ_FOREACH(entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
    8328           0 :                         spdk_json_write_object_begin(w);
    8329           0 :                         spdk_json_write_named_object_begin(w, "trid");
    8330           0 :                         nvme_bdev_dump_trid_json(&entry_ctx->trid, w);
    8331           0 :                         spdk_json_write_object_end(w);
    8332           0 :                         spdk_json_write_object_end(w);
    8333             :                 }
    8334           0 :                 spdk_json_write_array_end(w);
    8335             : 
    8336           0 :                 spdk_json_write_object_end(w);
    8337             :         }
    8338           0 :         spdk_json_write_array_end(w);
    8339           0 : }
    8340             : 
    8341           1 : SPDK_LOG_REGISTER_COMPONENT(bdev_nvme)
    8342             : 
    8343           1 : SPDK_TRACE_REGISTER_FN(bdev_nvme_trace, "bdev_nvme", TRACE_GROUP_BDEV_NVME)
    8344             : {
    8345           0 :         struct spdk_trace_tpoint_opts opts[] = {
    8346             :                 {
    8347             :                         "BDEV_NVME_IO_START", TRACE_BDEV_NVME_IO_START,
    8348             :                         OWNER_NONE, OBJECT_BDEV_NVME_IO, 1,
    8349             :                         {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
    8350             :                 },
    8351             :                 {
    8352             :                         "BDEV_NVME_IO_DONE", TRACE_BDEV_NVME_IO_DONE,
    8353             :                         OWNER_NONE, OBJECT_BDEV_NVME_IO, 0,
    8354             :                         {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
    8355             :                 }
    8356             :         };
    8357             : 
    8358             : 
    8359           0 :         spdk_trace_register_object(OBJECT_BDEV_NVME_IO, 'N');
    8360           0 :         spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
    8361           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
    8362           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
    8363           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
    8364           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
    8365           0 : }

Generated by: LCOV version 1.15