Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2016 Intel Corporation. All rights reserved.
3 : * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4 : * Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : * Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved.
6 : */
7 :
8 : #include "spdk/stdinc.h"
9 :
10 : #include "bdev_nvme.h"
11 :
12 : #include "spdk/accel.h"
13 : #include "spdk/config.h"
14 : #include "spdk/endian.h"
15 : #include "spdk/bdev.h"
16 : #include "spdk/json.h"
17 : #include "spdk/keyring.h"
18 : #include "spdk/likely.h"
19 : #include "spdk/nvme.h"
20 : #include "spdk/nvme_ocssd.h"
21 : #include "spdk/nvme_zns.h"
22 : #include "spdk/opal.h"
23 : #include "spdk/thread.h"
24 : #include "spdk/trace.h"
25 : #include "spdk/string.h"
26 : #include "spdk/util.h"
27 : #include "spdk/uuid.h"
28 :
29 : #include "spdk/bdev_module.h"
30 : #include "spdk/log.h"
31 :
32 : #include "spdk_internal/usdt.h"
33 : #include "spdk_internal/trace_defs.h"
34 :
35 : #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true
36 : #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
37 :
38 : #define NSID_STR_LEN 10
39 :
40 : #define SPDK_CONTROLLER_NAME_MAX 512
41 :
42 : static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
43 :
44 : struct nvme_bdev_io {
45 : /** array of iovecs to transfer. */
46 : struct iovec *iovs;
47 :
48 : /** Number of iovecs in iovs array. */
49 : int iovcnt;
50 :
51 : /** Current iovec position. */
52 : int iovpos;
53 :
54 : /** Offset in current iovec. */
55 : uint32_t iov_offset;
56 :
57 : /** I/O path the current I/O or admin passthrough is submitted on, or the I/O path
58 : * being reset in a reset I/O.
59 : */
60 : struct nvme_io_path *io_path;
61 :
62 : /** array of iovecs to transfer. */
63 : struct iovec *fused_iovs;
64 :
65 : /** Number of iovecs in iovs array. */
66 : int fused_iovcnt;
67 :
68 : /** Current iovec position. */
69 : int fused_iovpos;
70 :
71 : /** Offset in current iovec. */
72 : uint32_t fused_iov_offset;
73 :
74 : /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */
75 : struct spdk_nvme_cpl cpl;
76 :
77 : /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */
78 : struct spdk_nvme_ns_cmd_ext_io_opts ext_opts;
79 :
80 : /** Keeps track if first of fused commands was submitted */
81 : bool first_fused_submitted;
82 :
83 : /** Keeps track if first of fused commands was completed */
84 : bool first_fused_completed;
85 :
86 : /** Temporary pointer to zone report buffer */
87 : struct spdk_nvme_zns_zone_report *zone_report_buf;
88 :
89 : /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */
90 : uint64_t handled_zones;
91 :
92 : /** Expiration value in ticks to retry the current I/O. */
93 : uint64_t retry_ticks;
94 :
95 : /* How many times the current I/O was retried. */
96 : int32_t retry_count;
97 :
98 : /* Current tsc at submit time. */
99 : uint64_t submit_tsc;
100 : };
101 :
102 : struct nvme_probe_skip_entry {
103 : struct spdk_nvme_transport_id trid;
104 : TAILQ_ENTRY(nvme_probe_skip_entry) tailq;
105 : };
106 : /* All the controllers deleted by users via RPC are skipped by hotplug monitor */
107 : static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
108 : g_skipped_nvme_ctrlrs);
109 :
110 : #define BDEV_NVME_DEFAULT_DIGESTS (SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA256) | \
111 : SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA384) | \
112 : SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA512))
113 :
114 : #define BDEV_NVME_DEFAULT_DHGROUPS (SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_NULL) | \
115 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_2048) | \
116 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_3072) | \
117 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_4096) | \
118 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_6144) | \
119 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_8192))
120 :
121 : static struct spdk_bdev_nvme_opts g_opts = {
122 : .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
123 : .timeout_us = 0,
124 : .timeout_admin_us = 0,
125 : .keep_alive_timeout_ms = SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS,
126 : .transport_retry_count = 4,
127 : .arbitration_burst = 0,
128 : .low_priority_weight = 0,
129 : .medium_priority_weight = 0,
130 : .high_priority_weight = 0,
131 : .nvme_adminq_poll_period_us = 10000ULL,
132 : .nvme_ioq_poll_period_us = 0,
133 : .io_queue_requests = 0,
134 : .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
135 : .bdev_retry_count = 3,
136 : .transport_ack_timeout = 0,
137 : .ctrlr_loss_timeout_sec = 0,
138 : .reconnect_delay_sec = 0,
139 : .fast_io_fail_timeout_sec = 0,
140 : .disable_auto_failback = false,
141 : .generate_uuids = false,
142 : .transport_tos = 0,
143 : .nvme_error_stat = false,
144 : .io_path_stat = false,
145 : .allow_accel_sequence = false,
146 : .dhchap_digests = BDEV_NVME_DEFAULT_DIGESTS,
147 : .dhchap_dhgroups = BDEV_NVME_DEFAULT_DHGROUPS,
148 : };
149 :
150 : #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL
151 : #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL
152 :
153 : static int g_hot_insert_nvme_controller_index = 0;
154 : static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
155 : static bool g_nvme_hotplug_enabled = false;
156 : struct spdk_thread *g_bdev_nvme_init_thread;
157 : static struct spdk_poller *g_hotplug_poller;
158 : static struct spdk_poller *g_hotplug_probe_poller;
159 : static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
160 :
161 : static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
162 : struct nvme_async_probe_ctx *ctx);
163 : static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
164 : struct nvme_async_probe_ctx *ctx);
165 : static int bdev_nvme_library_init(void);
166 : static void bdev_nvme_library_fini(void);
167 : static void _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch,
168 : struct spdk_bdev_io *bdev_io);
169 : static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
170 : struct spdk_bdev_io *bdev_io);
171 : static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
172 : void *md, uint64_t lba_count, uint64_t lba,
173 : uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
174 : struct spdk_accel_sequence *seq);
175 : static int bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
176 : void *md, uint64_t lba_count, uint64_t lba);
177 : static int bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
178 : void *md, uint64_t lba_count, uint64_t lba,
179 : uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
180 : struct spdk_accel_sequence *seq,
181 : union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13);
182 : static int bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
183 : void *md, uint64_t lba_count,
184 : uint64_t zslba, uint32_t flags);
185 : static int bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
186 : void *md, uint64_t lba_count, uint64_t lba,
187 : uint32_t flags);
188 : static int bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio,
189 : struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
190 : int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba,
191 : uint32_t flags);
192 : static int bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id,
193 : uint32_t num_zones, struct spdk_bdev_zone_info *info);
194 : static int bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
195 : enum spdk_bdev_zone_action action);
196 : static void bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch,
197 : struct nvme_bdev_io *bio,
198 : struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
199 : static int bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
200 : void *buf, size_t nbytes);
201 : static int bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
202 : void *buf, size_t nbytes, void *md_buf, size_t md_len);
203 : static int bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
204 : struct iovec *iov, int iovcnt, size_t nbytes,
205 : void *md_buf, size_t md_len);
206 : static void bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch,
207 : struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
208 : static void bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio);
209 : static int bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
210 : static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
211 : static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
212 : static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr);
213 :
214 : static struct nvme_ns *nvme_ns_alloc(void);
215 : static void nvme_ns_free(struct nvme_ns *ns);
216 :
217 : static int
218 173 : nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2)
219 : {
220 173 : return ns1->id < ns2->id ? -1 : ns1->id > ns2->id;
221 : }
222 :
223 895 : RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp);
224 :
225 : struct spdk_nvme_qpair *
226 1 : bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
227 : {
228 : struct nvme_ctrlr_channel *ctrlr_ch;
229 :
230 1 : assert(ctrlr_io_ch != NULL);
231 :
232 1 : ctrlr_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
233 :
234 1 : return ctrlr_ch->qpair->qpair;
235 : }
236 :
237 : static int
238 0 : bdev_nvme_get_ctx_size(void)
239 : {
240 0 : return sizeof(struct nvme_bdev_io);
241 : }
242 :
243 : static struct spdk_bdev_module nvme_if = {
244 : .name = "nvme",
245 : .async_fini = true,
246 : .module_init = bdev_nvme_library_init,
247 : .module_fini = bdev_nvme_library_fini,
248 : .config_json = bdev_nvme_config_json,
249 : .get_ctx_size = bdev_nvme_get_ctx_size,
250 :
251 : };
252 1 : SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
253 :
254 : struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_bdev_ctrlrs);
255 : pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
256 : bool g_bdev_nvme_module_finish;
257 :
258 : struct nvme_bdev_ctrlr *
259 270 : nvme_bdev_ctrlr_get_by_name(const char *name)
260 : {
261 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
262 :
263 270 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
264 148 : if (strcmp(name, nbdev_ctrlr->name) == 0) {
265 148 : break;
266 : }
267 : }
268 :
269 270 : return nbdev_ctrlr;
270 : }
271 :
272 : static struct nvme_ctrlr *
273 58 : nvme_bdev_ctrlr_get_ctrlr(struct nvme_bdev_ctrlr *nbdev_ctrlr,
274 : const struct spdk_nvme_transport_id *trid, const char *hostnqn)
275 : {
276 : const struct spdk_nvme_ctrlr_opts *opts;
277 : struct nvme_ctrlr *nvme_ctrlr;
278 :
279 99 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
280 74 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
281 74 : if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->active_path_id->trid) == 0 &&
282 33 : strcmp(hostnqn, opts->hostnqn) == 0) {
283 33 : break;
284 : }
285 : }
286 :
287 58 : return nvme_ctrlr;
288 : }
289 :
290 : struct nvme_ctrlr *
291 0 : nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr,
292 : uint16_t cntlid)
293 : {
294 : struct nvme_ctrlr *nvme_ctrlr;
295 : const struct spdk_nvme_ctrlr_data *cdata;
296 :
297 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
298 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
299 0 : if (cdata->cntlid == cntlid) {
300 0 : break;
301 : }
302 : }
303 :
304 0 : return nvme_ctrlr;
305 : }
306 :
307 : static struct nvme_bdev *
308 72 : nvme_bdev_ctrlr_get_bdev(struct nvme_bdev_ctrlr *nbdev_ctrlr, uint32_t nsid)
309 : {
310 : struct nvme_bdev *bdev;
311 :
312 72 : pthread_mutex_lock(&g_bdev_nvme_mutex);
313 106 : TAILQ_FOREACH(bdev, &nbdev_ctrlr->bdevs, tailq) {
314 68 : if (bdev->nsid == nsid) {
315 34 : break;
316 : }
317 : }
318 72 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
319 :
320 72 : return bdev;
321 : }
322 :
323 : struct nvme_ns *
324 139 : nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
325 : {
326 139 : struct nvme_ns ns;
327 :
328 139 : assert(nsid > 0);
329 :
330 139 : ns.id = nsid;
331 139 : return RB_FIND(nvme_ns_tree, &nvme_ctrlr->namespaces, &ns);
332 : }
333 :
334 : struct nvme_ns *
335 152 : nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr)
336 : {
337 152 : return RB_MIN(nvme_ns_tree, &nvme_ctrlr->namespaces);
338 : }
339 :
340 : struct nvme_ns *
341 63 : nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns)
342 : {
343 63 : if (ns == NULL) {
344 0 : return NULL;
345 : }
346 :
347 63 : return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
348 : }
349 :
350 : static struct nvme_ctrlr *
351 51 : nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid, const char *hostnqn)
352 : {
353 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
354 51 : struct nvme_ctrlr *nvme_ctrlr = NULL;
355 :
356 51 : pthread_mutex_lock(&g_bdev_nvme_mutex);
357 70 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
358 19 : nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, trid, hostnqn);
359 19 : if (nvme_ctrlr != NULL) {
360 0 : break;
361 : }
362 : }
363 51 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
364 :
365 51 : return nvme_ctrlr;
366 : }
367 :
368 : struct nvme_ctrlr *
369 71 : nvme_ctrlr_get_by_name(const char *name)
370 : {
371 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
372 71 : struct nvme_ctrlr *nvme_ctrlr = NULL;
373 :
374 71 : if (name == NULL) {
375 0 : return NULL;
376 : }
377 :
378 71 : pthread_mutex_lock(&g_bdev_nvme_mutex);
379 71 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
380 71 : if (nbdev_ctrlr != NULL) {
381 40 : nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
382 : }
383 71 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
384 :
385 71 : return nvme_ctrlr;
386 : }
387 :
388 : void
389 0 : nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx)
390 : {
391 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
392 :
393 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
394 0 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
395 0 : fn(nbdev_ctrlr, ctx);
396 : }
397 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
398 0 : }
399 :
400 : void
401 0 : nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
402 : {
403 : const char *trtype_str;
404 : const char *adrfam_str;
405 :
406 0 : trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
407 0 : if (trtype_str) {
408 0 : spdk_json_write_named_string(w, "trtype", trtype_str);
409 : }
410 :
411 0 : adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
412 0 : if (adrfam_str) {
413 0 : spdk_json_write_named_string(w, "adrfam", adrfam_str);
414 : }
415 :
416 0 : if (trid->traddr[0] != '\0') {
417 0 : spdk_json_write_named_string(w, "traddr", trid->traddr);
418 : }
419 :
420 0 : if (trid->trsvcid[0] != '\0') {
421 0 : spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
422 : }
423 :
424 0 : if (trid->subnqn[0] != '\0') {
425 0 : spdk_json_write_named_string(w, "subnqn", trid->subnqn);
426 : }
427 0 : }
428 :
429 : static void
430 59 : nvme_bdev_ctrlr_delete(struct nvme_bdev_ctrlr *nbdev_ctrlr,
431 : struct nvme_ctrlr *nvme_ctrlr)
432 : {
433 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_delete, nvme_ctrlr->nbdev_ctrlr->name);
434 59 : pthread_mutex_lock(&g_bdev_nvme_mutex);
435 :
436 59 : TAILQ_REMOVE(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
437 59 : if (!TAILQ_EMPTY(&nbdev_ctrlr->ctrlrs)) {
438 15 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
439 :
440 15 : return;
441 : }
442 44 : TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
443 :
444 44 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
445 :
446 44 : assert(TAILQ_EMPTY(&nbdev_ctrlr->bdevs));
447 :
448 44 : free(nbdev_ctrlr->name);
449 44 : free(nbdev_ctrlr);
450 : }
451 :
452 : static void
453 60 : _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
454 : {
455 : struct nvme_path_id *path_id, *tmp_path;
456 : struct nvme_ns *ns, *tmp_ns;
457 :
458 60 : free(nvme_ctrlr->copied_ana_desc);
459 60 : spdk_free(nvme_ctrlr->ana_log_page);
460 :
461 60 : if (nvme_ctrlr->opal_dev) {
462 0 : spdk_opal_dev_destruct(nvme_ctrlr->opal_dev);
463 0 : nvme_ctrlr->opal_dev = NULL;
464 : }
465 :
466 60 : if (nvme_ctrlr->nbdev_ctrlr) {
467 59 : nvme_bdev_ctrlr_delete(nvme_ctrlr->nbdev_ctrlr, nvme_ctrlr);
468 : }
469 :
470 60 : RB_FOREACH_SAFE(ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp_ns) {
471 0 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
472 0 : nvme_ns_free(ns);
473 : }
474 :
475 120 : TAILQ_FOREACH_SAFE(path_id, &nvme_ctrlr->trids, link, tmp_path) {
476 60 : TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
477 60 : free(path_id);
478 : }
479 :
480 60 : pthread_mutex_destroy(&nvme_ctrlr->mutex);
481 60 : spdk_keyring_put_key(nvme_ctrlr->psk);
482 60 : spdk_keyring_put_key(nvme_ctrlr->dhchap_key);
483 60 : spdk_keyring_put_key(nvme_ctrlr->dhchap_ctrlr_key);
484 60 : free(nvme_ctrlr);
485 :
486 60 : pthread_mutex_lock(&g_bdev_nvme_mutex);
487 60 : if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
488 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
489 0 : spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
490 0 : spdk_bdev_module_fini_done();
491 0 : return;
492 : }
493 60 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
494 : }
495 :
496 : static int
497 60 : nvme_detach_poller(void *arg)
498 : {
499 60 : struct nvme_ctrlr *nvme_ctrlr = arg;
500 : int rc;
501 :
502 60 : rc = spdk_nvme_detach_poll_async(nvme_ctrlr->detach_ctx);
503 60 : if (rc != -EAGAIN) {
504 60 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
505 60 : _nvme_ctrlr_delete(nvme_ctrlr);
506 : }
507 :
508 60 : return SPDK_POLLER_BUSY;
509 : }
510 :
511 : static void
512 60 : nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
513 : {
514 : int rc;
515 :
516 60 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
517 :
518 : /* First, unregister the adminq poller, as the driver will poll adminq if necessary */
519 60 : spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
520 :
521 : /* If we got here, the reset/detach poller cannot be active */
522 60 : assert(nvme_ctrlr->reset_detach_poller == NULL);
523 60 : nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(nvme_detach_poller,
524 : nvme_ctrlr, 1000);
525 60 : if (nvme_ctrlr->reset_detach_poller == NULL) {
526 0 : SPDK_ERRLOG("Failed to register detach poller\n");
527 0 : goto error;
528 : }
529 :
530 60 : rc = spdk_nvme_detach_async(nvme_ctrlr->ctrlr, &nvme_ctrlr->detach_ctx);
531 60 : if (rc != 0) {
532 0 : SPDK_ERRLOG("Failed to detach the NVMe controller\n");
533 0 : goto error;
534 : }
535 :
536 60 : return;
537 0 : error:
538 : /* We don't have a good way to handle errors here, so just do what we can and delete the
539 : * controller without detaching the underlying NVMe device.
540 : */
541 0 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
542 0 : _nvme_ctrlr_delete(nvme_ctrlr);
543 : }
544 :
545 : static void
546 59 : nvme_ctrlr_unregister_cb(void *io_device)
547 : {
548 59 : struct nvme_ctrlr *nvme_ctrlr = io_device;
549 :
550 59 : nvme_ctrlr_delete(nvme_ctrlr);
551 59 : }
552 :
553 : static void
554 59 : nvme_ctrlr_unregister(void *ctx)
555 : {
556 59 : struct nvme_ctrlr *nvme_ctrlr = ctx;
557 :
558 59 : spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb);
559 59 : }
560 :
561 : static bool
562 220 : nvme_ctrlr_can_be_unregistered(struct nvme_ctrlr *nvme_ctrlr)
563 : {
564 220 : if (!nvme_ctrlr->destruct) {
565 105 : return false;
566 : }
567 :
568 115 : if (nvme_ctrlr->ref > 0) {
569 56 : return false;
570 : }
571 :
572 59 : if (nvme_ctrlr->resetting) {
573 0 : return false;
574 : }
575 :
576 59 : if (nvme_ctrlr->ana_log_page_updating) {
577 0 : return false;
578 : }
579 :
580 59 : if (nvme_ctrlr->io_path_cache_clearing) {
581 0 : return false;
582 : }
583 :
584 59 : return true;
585 : }
586 :
587 : static void
588 164 : nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr)
589 : {
590 164 : pthread_mutex_lock(&nvme_ctrlr->mutex);
591 : SPDK_DTRACE_PROBE2(bdev_nvme_ctrlr_release, nvme_ctrlr->nbdev_ctrlr->name, nvme_ctrlr->ref);
592 :
593 164 : assert(nvme_ctrlr->ref > 0);
594 164 : nvme_ctrlr->ref--;
595 :
596 164 : if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
597 105 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
598 105 : return;
599 : }
600 :
601 59 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
602 :
603 59 : spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr);
604 : }
605 :
606 : static void
607 161 : bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch)
608 : {
609 161 : nbdev_ch->current_io_path = NULL;
610 161 : nbdev_ch->rr_counter = 0;
611 161 : }
612 :
613 : static struct nvme_io_path *
614 8 : _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
615 : {
616 : struct nvme_io_path *io_path;
617 :
618 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
619 15 : if (io_path->nvme_ns == nvme_ns) {
620 7 : break;
621 : }
622 : }
623 :
624 8 : return io_path;
625 : }
626 :
627 : static struct nvme_io_path *
628 35 : nvme_io_path_alloc(void)
629 : {
630 : struct nvme_io_path *io_path;
631 :
632 35 : io_path = calloc(1, sizeof(*io_path));
633 35 : if (io_path == NULL) {
634 0 : SPDK_ERRLOG("Failed to alloc io_path.\n");
635 0 : return NULL;
636 : }
637 :
638 35 : if (g_opts.io_path_stat) {
639 0 : io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
640 0 : if (io_path->stat == NULL) {
641 0 : free(io_path);
642 0 : SPDK_ERRLOG("Failed to alloc io_path stat.\n");
643 0 : return NULL;
644 : }
645 0 : spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
646 : }
647 :
648 35 : return io_path;
649 : }
650 :
651 : static void
652 35 : nvme_io_path_free(struct nvme_io_path *io_path)
653 : {
654 35 : free(io_path->stat);
655 35 : free(io_path);
656 35 : }
657 :
658 : static int
659 35 : _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
660 : {
661 : struct nvme_io_path *io_path;
662 : struct spdk_io_channel *ch;
663 : struct nvme_ctrlr_channel *ctrlr_ch;
664 : struct nvme_qpair *nvme_qpair;
665 :
666 35 : io_path = nvme_io_path_alloc();
667 35 : if (io_path == NULL) {
668 0 : return -ENOMEM;
669 : }
670 :
671 35 : io_path->nvme_ns = nvme_ns;
672 :
673 35 : ch = spdk_get_io_channel(nvme_ns->ctrlr);
674 35 : if (ch == NULL) {
675 0 : nvme_io_path_free(io_path);
676 0 : SPDK_ERRLOG("Failed to alloc io_channel.\n");
677 0 : return -ENOMEM;
678 : }
679 :
680 35 : ctrlr_ch = spdk_io_channel_get_ctx(ch);
681 :
682 35 : nvme_qpair = ctrlr_ch->qpair;
683 35 : assert(nvme_qpair != NULL);
684 :
685 35 : io_path->qpair = nvme_qpair;
686 35 : TAILQ_INSERT_TAIL(&nvme_qpair->io_path_list, io_path, tailq);
687 :
688 35 : io_path->nbdev_ch = nbdev_ch;
689 35 : STAILQ_INSERT_TAIL(&nbdev_ch->io_path_list, io_path, stailq);
690 :
691 35 : bdev_nvme_clear_current_io_path(nbdev_ch);
692 :
693 35 : return 0;
694 : }
695 :
696 : static void
697 35 : bdev_nvme_clear_retry_io_path(struct nvme_bdev_channel *nbdev_ch,
698 : struct nvme_io_path *io_path)
699 : {
700 : struct spdk_bdev_io *bdev_io;
701 : struct nvme_bdev_io *bio;
702 :
703 36 : TAILQ_FOREACH(bdev_io, &nbdev_ch->retry_io_list, module_link) {
704 1 : bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
705 1 : if (bio->io_path == io_path) {
706 1 : bio->io_path = NULL;
707 : }
708 : }
709 35 : }
710 :
711 : static void
712 35 : _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *io_path)
713 : {
714 : struct spdk_io_channel *ch;
715 : struct nvme_qpair *nvme_qpair;
716 : struct nvme_ctrlr_channel *ctrlr_ch;
717 : struct nvme_bdev *nbdev;
718 :
719 35 : nbdev = spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(nbdev_ch));
720 :
721 : /* Add the statistics to nvme_ns before this path is destroyed. */
722 35 : pthread_mutex_lock(&nbdev->mutex);
723 35 : if (nbdev->ref != 0 && io_path->nvme_ns->stat != NULL && io_path->stat != NULL) {
724 0 : spdk_bdev_add_io_stat(io_path->nvme_ns->stat, io_path->stat);
725 : }
726 35 : pthread_mutex_unlock(&nbdev->mutex);
727 :
728 35 : bdev_nvme_clear_current_io_path(nbdev_ch);
729 35 : bdev_nvme_clear_retry_io_path(nbdev_ch, io_path);
730 :
731 35 : STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
732 35 : io_path->nbdev_ch = NULL;
733 :
734 35 : nvme_qpair = io_path->qpair;
735 35 : assert(nvme_qpair != NULL);
736 :
737 35 : ctrlr_ch = nvme_qpair->ctrlr_ch;
738 35 : assert(ctrlr_ch != NULL);
739 :
740 35 : ch = spdk_io_channel_from_ctx(ctrlr_ch);
741 35 : spdk_put_io_channel(ch);
742 :
743 : /* After an io_path is removed, I/Os submitted to it may complete and update statistics
744 : * of the io_path. To avoid heap-use-after-free error from this case, do not free the
745 : * io_path here but free the io_path when the associated qpair is freed. It is ensured
746 : * that all I/Os submitted to the io_path are completed when the associated qpair is freed.
747 : */
748 35 : }
749 :
750 : static void
751 22 : _bdev_nvme_delete_io_paths(struct nvme_bdev_channel *nbdev_ch)
752 : {
753 : struct nvme_io_path *io_path, *tmp_io_path;
754 :
755 55 : STAILQ_FOREACH_SAFE(io_path, &nbdev_ch->io_path_list, stailq, tmp_io_path) {
756 33 : _bdev_nvme_delete_io_path(nbdev_ch, io_path);
757 : }
758 22 : }
759 :
760 : static int
761 22 : bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
762 : {
763 22 : struct nvme_bdev_channel *nbdev_ch = ctx_buf;
764 22 : struct nvme_bdev *nbdev = io_device;
765 : struct nvme_ns *nvme_ns;
766 : int rc;
767 :
768 22 : STAILQ_INIT(&nbdev_ch->io_path_list);
769 22 : TAILQ_INIT(&nbdev_ch->retry_io_list);
770 :
771 22 : pthread_mutex_lock(&nbdev->mutex);
772 :
773 22 : nbdev_ch->mp_policy = nbdev->mp_policy;
774 22 : nbdev_ch->mp_selector = nbdev->mp_selector;
775 22 : nbdev_ch->rr_min_io = nbdev->rr_min_io;
776 :
777 55 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
778 33 : rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
779 33 : if (rc != 0) {
780 0 : pthread_mutex_unlock(&nbdev->mutex);
781 :
782 0 : _bdev_nvme_delete_io_paths(nbdev_ch);
783 0 : return rc;
784 : }
785 : }
786 22 : pthread_mutex_unlock(&nbdev->mutex);
787 :
788 22 : return 0;
789 : }
790 :
791 : /* If cpl != NULL, complete the bdev_io with nvme status based on 'cpl'.
792 : * If cpl == NULL, complete the bdev_io with bdev status based on 'status'.
793 : */
794 : static inline void
795 47 : __bdev_nvme_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status,
796 : const struct spdk_nvme_cpl *cpl)
797 : {
798 47 : spdk_trace_record(TRACE_BDEV_NVME_IO_DONE, 0, 0, (uintptr_t)bdev_io->driver_ctx,
799 : (uintptr_t)bdev_io);
800 47 : if (cpl) {
801 29 : spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
802 : } else {
803 18 : spdk_bdev_io_complete(bdev_io, status);
804 : }
805 47 : }
806 :
807 : static void bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch);
808 :
809 : static void
810 22 : bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf)
811 : {
812 22 : struct nvme_bdev_channel *nbdev_ch = ctx_buf;
813 :
814 22 : bdev_nvme_abort_retry_ios(nbdev_ch);
815 22 : _bdev_nvme_delete_io_paths(nbdev_ch);
816 22 : }
817 :
818 : static inline bool
819 58 : bdev_nvme_io_type_is_admin(enum spdk_bdev_io_type io_type)
820 : {
821 58 : switch (io_type) {
822 5 : case SPDK_BDEV_IO_TYPE_RESET:
823 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
824 : case SPDK_BDEV_IO_TYPE_ABORT:
825 5 : return true;
826 53 : default:
827 53 : break;
828 : }
829 :
830 53 : return false;
831 : }
832 :
833 : static inline bool
834 90 : nvme_ns_is_active(struct nvme_ns *nvme_ns)
835 : {
836 90 : if (spdk_unlikely(nvme_ns->ana_state_updating)) {
837 1 : return false;
838 : }
839 :
840 89 : if (spdk_unlikely(nvme_ns->ns == NULL)) {
841 0 : return false;
842 : }
843 :
844 89 : return true;
845 : }
846 :
847 : static inline bool
848 78 : nvme_ns_is_accessible(struct nvme_ns *nvme_ns)
849 : {
850 78 : if (spdk_unlikely(!nvme_ns_is_active(nvme_ns))) {
851 1 : return false;
852 : }
853 :
854 77 : switch (nvme_ns->ana_state) {
855 68 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
856 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
857 68 : return true;
858 9 : default:
859 9 : break;
860 : }
861 :
862 9 : return false;
863 : }
864 :
865 : static inline bool
866 117 : nvme_qpair_is_connected(struct nvme_qpair *nvme_qpair)
867 : {
868 117 : if (spdk_unlikely(nvme_qpair->qpair == NULL)) {
869 20 : return false;
870 : }
871 :
872 97 : if (spdk_unlikely(spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
873 : SPDK_NVME_QPAIR_FAILURE_NONE)) {
874 2 : return false;
875 : }
876 :
877 95 : if (spdk_unlikely(nvme_qpair->ctrlr_ch->reset_iter != NULL)) {
878 0 : return false;
879 : }
880 :
881 95 : return true;
882 : }
883 :
884 : static inline bool
885 92 : nvme_io_path_is_available(struct nvme_io_path *io_path)
886 : {
887 92 : if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
888 14 : return false;
889 : }
890 :
891 78 : if (spdk_unlikely(!nvme_ns_is_accessible(io_path->nvme_ns))) {
892 10 : return false;
893 : }
894 :
895 68 : return true;
896 : }
897 :
898 : static inline bool
899 8 : nvme_ctrlr_is_failed(struct nvme_ctrlr *nvme_ctrlr)
900 : {
901 8 : if (nvme_ctrlr->destruct) {
902 0 : return true;
903 : }
904 :
905 8 : if (nvme_ctrlr->fast_io_fail_timedout) {
906 2 : return true;
907 : }
908 :
909 6 : if (nvme_ctrlr->resetting) {
910 4 : if (nvme_ctrlr->opts.reconnect_delay_sec != 0) {
911 4 : return false;
912 : } else {
913 0 : return true;
914 : }
915 : }
916 :
917 2 : if (nvme_ctrlr->reconnect_is_delayed) {
918 2 : return false;
919 : }
920 :
921 0 : if (nvme_ctrlr->disabled) {
922 0 : return true;
923 : }
924 :
925 0 : if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
926 0 : return true;
927 : } else {
928 0 : return false;
929 : }
930 : }
931 :
932 : static bool
933 20 : nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr)
934 : {
935 20 : if (nvme_ctrlr->destruct) {
936 0 : return false;
937 : }
938 :
939 20 : if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
940 3 : return false;
941 : }
942 :
943 17 : if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) {
944 1 : return false;
945 : }
946 :
947 16 : if (nvme_ctrlr->disabled) {
948 0 : return false;
949 : }
950 :
951 16 : return true;
952 : }
953 :
954 : /* Simulate circular linked list. */
955 : static inline struct nvme_io_path *
956 87 : nvme_io_path_get_next(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *prev_path)
957 : {
958 : struct nvme_io_path *next_path;
959 :
960 87 : if (prev_path != NULL) {
961 37 : next_path = STAILQ_NEXT(prev_path, stailq);
962 37 : if (next_path != NULL) {
963 14 : return next_path;
964 : }
965 : }
966 :
967 73 : return STAILQ_FIRST(&nbdev_ch->io_path_list);
968 : }
969 :
970 : static struct nvme_io_path *
971 57 : _bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
972 : {
973 57 : struct nvme_io_path *io_path, *start, *non_optimized = NULL;
974 :
975 57 : start = nvme_io_path_get_next(nbdev_ch, nbdev_ch->current_io_path);
976 :
977 57 : io_path = start;
978 : do {
979 69 : if (spdk_likely(nvme_io_path_is_available(io_path))) {
980 49 : switch (io_path->nvme_ns->ana_state) {
981 39 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
982 39 : nbdev_ch->current_io_path = io_path;
983 39 : return io_path;
984 10 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
985 10 : if (non_optimized == NULL) {
986 7 : non_optimized = io_path;
987 : }
988 10 : break;
989 0 : default:
990 0 : assert(false);
991 : break;
992 : }
993 : }
994 30 : io_path = nvme_io_path_get_next(nbdev_ch, io_path);
995 30 : } while (io_path != start);
996 :
997 18 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
998 : /* We come here only if there is no optimized path. Cache even non_optimized
999 : * path for load balance across multiple non_optimized paths.
1000 : */
1001 1 : nbdev_ch->current_io_path = non_optimized;
1002 : }
1003 :
1004 18 : return non_optimized;
1005 : }
1006 :
1007 : static struct nvme_io_path *
1008 4 : _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch)
1009 : {
1010 : struct nvme_io_path *io_path;
1011 4 : struct nvme_io_path *optimized = NULL, *non_optimized = NULL;
1012 4 : uint32_t opt_min_qd = UINT32_MAX, non_opt_min_qd = UINT32_MAX;
1013 : uint32_t num_outstanding_reqs;
1014 :
1015 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
1016 12 : if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
1017 : /* The device is currently resetting. */
1018 0 : continue;
1019 : }
1020 :
1021 12 : if (spdk_unlikely(!nvme_ns_is_active(io_path->nvme_ns))) {
1022 0 : continue;
1023 : }
1024 :
1025 12 : num_outstanding_reqs = spdk_nvme_qpair_get_num_outstanding_reqs(io_path->qpair->qpair);
1026 12 : switch (io_path->nvme_ns->ana_state) {
1027 6 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
1028 6 : if (num_outstanding_reqs < opt_min_qd) {
1029 5 : opt_min_qd = num_outstanding_reqs;
1030 5 : optimized = io_path;
1031 : }
1032 6 : break;
1033 3 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
1034 3 : if (num_outstanding_reqs < non_opt_min_qd) {
1035 3 : non_opt_min_qd = num_outstanding_reqs;
1036 3 : non_optimized = io_path;
1037 : }
1038 3 : break;
1039 3 : default:
1040 3 : break;
1041 : }
1042 : }
1043 :
1044 : /* don't cache io path for BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH selector */
1045 4 : if (optimized != NULL) {
1046 3 : return optimized;
1047 : }
1048 :
1049 1 : return non_optimized;
1050 : }
1051 :
1052 : static inline struct nvme_io_path *
1053 95 : bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
1054 : {
1055 95 : if (spdk_likely(nbdev_ch->current_io_path != NULL)) {
1056 41 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) {
1057 31 : return nbdev_ch->current_io_path;
1058 10 : } else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
1059 10 : if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) {
1060 3 : return nbdev_ch->current_io_path;
1061 : }
1062 7 : nbdev_ch->rr_counter = 0;
1063 : }
1064 : }
1065 :
1066 61 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE ||
1067 14 : nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
1068 57 : return _bdev_nvme_find_io_path(nbdev_ch);
1069 : } else {
1070 4 : return _bdev_nvme_find_io_path_min_qd(nbdev_ch);
1071 : }
1072 : }
1073 :
1074 : /* Return true if there is any io_path whose qpair is active or ctrlr is not failed,
1075 : * or false otherwise.
1076 : *
1077 : * If any io_path has an active qpair but find_io_path() returned NULL, its namespace
1078 : * is likely to be non-accessible now but may become accessible.
1079 : *
1080 : * If any io_path has an unfailed ctrlr but find_io_path() returned NULL, the ctrlr
1081 : * is likely to be resetting now but the reset may succeed. A ctrlr is set to unfailed
1082 : * when starting to reset it but it is set to failed when the reset failed. Hence, if
1083 : * a ctrlr is unfailed, it is likely that it works fine or is resetting.
1084 : */
1085 : static bool
1086 13 : any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
1087 : {
1088 : struct nvme_io_path *io_path;
1089 :
1090 15 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
1091 13 : if (io_path->nvme_ns->ana_transition_timedout) {
1092 0 : continue;
1093 : }
1094 :
1095 13 : if (nvme_qpair_is_connected(io_path->qpair) ||
1096 8 : !nvme_ctrlr_is_failed(io_path->qpair->ctrlr)) {
1097 11 : return true;
1098 : }
1099 : }
1100 :
1101 2 : return false;
1102 : }
1103 :
1104 : static void
1105 14 : bdev_nvme_retry_io(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
1106 : {
1107 14 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1108 : struct spdk_io_channel *ch;
1109 :
1110 14 : if (nbdev_io->io_path != NULL && nvme_io_path_is_available(nbdev_io->io_path)) {
1111 3 : _bdev_nvme_submit_request(nbdev_ch, bdev_io);
1112 : } else {
1113 11 : ch = spdk_io_channel_from_ctx(nbdev_ch);
1114 11 : bdev_nvme_submit_request(ch, bdev_io);
1115 : }
1116 14 : }
1117 :
1118 : static int
1119 14 : bdev_nvme_retry_ios(void *arg)
1120 : {
1121 14 : struct nvme_bdev_channel *nbdev_ch = arg;
1122 : struct spdk_bdev_io *bdev_io, *tmp_bdev_io;
1123 : struct nvme_bdev_io *bio;
1124 : uint64_t now, delay_us;
1125 :
1126 14 : now = spdk_get_ticks();
1127 :
1128 28 : TAILQ_FOREACH_SAFE(bdev_io, &nbdev_ch->retry_io_list, module_link, tmp_bdev_io) {
1129 15 : bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1130 15 : if (bio->retry_ticks > now) {
1131 1 : break;
1132 : }
1133 :
1134 14 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
1135 :
1136 14 : bdev_nvme_retry_io(nbdev_ch, bdev_io);
1137 : }
1138 :
1139 14 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1140 :
1141 14 : bdev_io = TAILQ_FIRST(&nbdev_ch->retry_io_list);
1142 14 : if (bdev_io != NULL) {
1143 4 : bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1144 :
1145 4 : delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
1146 :
1147 4 : nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
1148 : delay_us);
1149 : }
1150 :
1151 14 : return SPDK_POLLER_BUSY;
1152 : }
1153 :
1154 : static void
1155 15 : bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
1156 : struct nvme_bdev_io *bio, uint64_t delay_ms)
1157 : {
1158 15 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1159 : struct spdk_bdev_io *tmp_bdev_io;
1160 : struct nvme_bdev_io *tmp_bio;
1161 :
1162 15 : bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;
1163 :
1164 15 : TAILQ_FOREACH_REVERSE(tmp_bdev_io, &nbdev_ch->retry_io_list, retry_io_head, module_link) {
1165 1 : tmp_bio = (struct nvme_bdev_io *)tmp_bdev_io->driver_ctx;
1166 :
1167 1 : if (tmp_bio->retry_ticks <= bio->retry_ticks) {
1168 1 : TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bdev_io, bdev_io,
1169 : module_link);
1170 1 : return;
1171 : }
1172 : }
1173 :
1174 : /* No earlier I/Os were found. This I/O must be the new head. */
1175 14 : TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bdev_io, module_link);
1176 :
1177 14 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1178 :
1179 14 : nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
1180 : delay_ms * 1000ULL);
1181 : }
1182 :
1183 : static void
1184 36 : bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
1185 : {
1186 : struct spdk_bdev_io *bdev_io, *tmp_io;
1187 :
1188 36 : TAILQ_FOREACH_SAFE(bdev_io, &nbdev_ch->retry_io_list, module_link, tmp_io) {
1189 0 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
1190 0 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_ABORTED, NULL);
1191 : }
1192 :
1193 36 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1194 36 : }
1195 :
1196 : static int
1197 6 : bdev_nvme_abort_retry_io(struct nvme_bdev_channel *nbdev_ch,
1198 : struct nvme_bdev_io *bio_to_abort)
1199 : {
1200 : struct spdk_bdev_io *bdev_io_to_abort;
1201 :
1202 6 : TAILQ_FOREACH(bdev_io_to_abort, &nbdev_ch->retry_io_list, module_link) {
1203 1 : if ((struct nvme_bdev_io *)bdev_io_to_abort->driver_ctx == bio_to_abort) {
1204 1 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io_to_abort, module_link);
1205 1 : __bdev_nvme_io_complete(bdev_io_to_abort, SPDK_BDEV_IO_STATUS_ABORTED, NULL);
1206 1 : return 0;
1207 : }
1208 : }
1209 :
1210 5 : return -ENOENT;
1211 : }
1212 :
1213 : static void
1214 12 : bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl)
1215 : {
1216 : struct nvme_bdev *nbdev;
1217 : uint16_t sct, sc;
1218 :
1219 12 : assert(spdk_nvme_cpl_is_error(cpl));
1220 :
1221 12 : nbdev = bdev_io->bdev->ctxt;
1222 :
1223 12 : if (nbdev->err_stat == NULL) {
1224 12 : return;
1225 : }
1226 :
1227 0 : sct = cpl->status.sct;
1228 0 : sc = cpl->status.sc;
1229 :
1230 0 : pthread_mutex_lock(&nbdev->mutex);
1231 :
1232 0 : nbdev->err_stat->status_type[sct]++;
1233 0 : switch (sct) {
1234 0 : case SPDK_NVME_SCT_GENERIC:
1235 : case SPDK_NVME_SCT_COMMAND_SPECIFIC:
1236 : case SPDK_NVME_SCT_MEDIA_ERROR:
1237 : case SPDK_NVME_SCT_PATH:
1238 0 : nbdev->err_stat->status[sct][sc]++;
1239 0 : break;
1240 0 : default:
1241 0 : break;
1242 : }
1243 :
1244 0 : pthread_mutex_unlock(&nbdev->mutex);
1245 : }
1246 :
1247 : static inline void
1248 20 : bdev_nvme_update_io_path_stat(struct nvme_bdev_io *bio)
1249 : {
1250 20 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1251 20 : uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
1252 20 : uint32_t blocklen = bdev_io->bdev->blocklen;
1253 : struct spdk_bdev_io_stat *stat;
1254 : uint64_t tsc_diff;
1255 :
1256 20 : if (bio->io_path->stat == NULL) {
1257 20 : return;
1258 : }
1259 :
1260 0 : tsc_diff = spdk_get_ticks() - bio->submit_tsc;
1261 0 : stat = bio->io_path->stat;
1262 :
1263 0 : switch (bdev_io->type) {
1264 0 : case SPDK_BDEV_IO_TYPE_READ:
1265 0 : stat->bytes_read += num_blocks * blocklen;
1266 0 : stat->num_read_ops++;
1267 0 : stat->read_latency_ticks += tsc_diff;
1268 0 : if (stat->max_read_latency_ticks < tsc_diff) {
1269 0 : stat->max_read_latency_ticks = tsc_diff;
1270 : }
1271 0 : if (stat->min_read_latency_ticks > tsc_diff) {
1272 0 : stat->min_read_latency_ticks = tsc_diff;
1273 : }
1274 0 : break;
1275 0 : case SPDK_BDEV_IO_TYPE_WRITE:
1276 0 : stat->bytes_written += num_blocks * blocklen;
1277 0 : stat->num_write_ops++;
1278 0 : stat->write_latency_ticks += tsc_diff;
1279 0 : if (stat->max_write_latency_ticks < tsc_diff) {
1280 0 : stat->max_write_latency_ticks = tsc_diff;
1281 : }
1282 0 : if (stat->min_write_latency_ticks > tsc_diff) {
1283 0 : stat->min_write_latency_ticks = tsc_diff;
1284 : }
1285 0 : break;
1286 0 : case SPDK_BDEV_IO_TYPE_UNMAP:
1287 0 : stat->bytes_unmapped += num_blocks * blocklen;
1288 0 : stat->num_unmap_ops++;
1289 0 : stat->unmap_latency_ticks += tsc_diff;
1290 0 : if (stat->max_unmap_latency_ticks < tsc_diff) {
1291 0 : stat->max_unmap_latency_ticks = tsc_diff;
1292 : }
1293 0 : if (stat->min_unmap_latency_ticks > tsc_diff) {
1294 0 : stat->min_unmap_latency_ticks = tsc_diff;
1295 : }
1296 0 : break;
1297 0 : case SPDK_BDEV_IO_TYPE_ZCOPY:
1298 : /* Track the data in the start phase only */
1299 0 : if (!bdev_io->u.bdev.zcopy.start) {
1300 0 : break;
1301 : }
1302 0 : if (bdev_io->u.bdev.zcopy.populate) {
1303 0 : stat->bytes_read += num_blocks * blocklen;
1304 0 : stat->num_read_ops++;
1305 0 : stat->read_latency_ticks += tsc_diff;
1306 0 : if (stat->max_read_latency_ticks < tsc_diff) {
1307 0 : stat->max_read_latency_ticks = tsc_diff;
1308 : }
1309 0 : if (stat->min_read_latency_ticks > tsc_diff) {
1310 0 : stat->min_read_latency_ticks = tsc_diff;
1311 : }
1312 : } else {
1313 0 : stat->bytes_written += num_blocks * blocklen;
1314 0 : stat->num_write_ops++;
1315 0 : stat->write_latency_ticks += tsc_diff;
1316 0 : if (stat->max_write_latency_ticks < tsc_diff) {
1317 0 : stat->max_write_latency_ticks = tsc_diff;
1318 : }
1319 0 : if (stat->min_write_latency_ticks > tsc_diff) {
1320 0 : stat->min_write_latency_ticks = tsc_diff;
1321 : }
1322 : }
1323 0 : break;
1324 0 : case SPDK_BDEV_IO_TYPE_COPY:
1325 0 : stat->bytes_copied += num_blocks * blocklen;
1326 0 : stat->num_copy_ops++;
1327 0 : stat->copy_latency_ticks += tsc_diff;
1328 0 : if (stat->max_copy_latency_ticks < tsc_diff) {
1329 0 : stat->max_copy_latency_ticks = tsc_diff;
1330 : }
1331 0 : if (stat->min_copy_latency_ticks > tsc_diff) {
1332 0 : stat->min_copy_latency_ticks = tsc_diff;
1333 : }
1334 0 : break;
1335 0 : default:
1336 0 : break;
1337 : }
1338 : }
1339 :
1340 : static bool
1341 7 : bdev_nvme_check_retry_io(struct nvme_bdev_io *bio,
1342 : const struct spdk_nvme_cpl *cpl,
1343 : struct nvme_bdev_channel *nbdev_ch,
1344 : uint64_t *_delay_ms)
1345 : {
1346 7 : struct nvme_io_path *io_path = bio->io_path;
1347 7 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
1348 : const struct spdk_nvme_ctrlr_data *cdata;
1349 :
1350 7 : if (spdk_nvme_cpl_is_path_error(cpl) ||
1351 5 : spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
1352 4 : !nvme_io_path_is_available(io_path) ||
1353 4 : !nvme_ctrlr_is_available(nvme_ctrlr)) {
1354 3 : bdev_nvme_clear_current_io_path(nbdev_ch);
1355 3 : bio->io_path = NULL;
1356 3 : if (spdk_nvme_cpl_is_ana_error(cpl)) {
1357 1 : if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
1358 1 : io_path->nvme_ns->ana_state_updating = true;
1359 : }
1360 : }
1361 3 : if (!any_io_path_may_become_available(nbdev_ch)) {
1362 0 : return false;
1363 : }
1364 3 : *_delay_ms = 0;
1365 : } else {
1366 4 : bio->retry_count++;
1367 :
1368 4 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
1369 :
1370 4 : if (cpl->status.crd != 0) {
1371 1 : *_delay_ms = cdata->crdt[cpl->status.crd] * 100;
1372 : } else {
1373 3 : *_delay_ms = 0;
1374 : }
1375 : }
1376 :
1377 7 : return true;
1378 : }
1379 :
1380 : static inline void
1381 32 : bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
1382 : const struct spdk_nvme_cpl *cpl)
1383 : {
1384 32 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1385 : struct nvme_bdev_channel *nbdev_ch;
1386 32 : uint64_t delay_ms;
1387 :
1388 32 : assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
1389 :
1390 32 : if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
1391 20 : bdev_nvme_update_io_path_stat(bio);
1392 20 : goto complete;
1393 : }
1394 :
1395 : /* Update error counts before deciding if retry is needed.
1396 : * Hence, error counts may be more than the number of I/O errors.
1397 : */
1398 12 : bdev_nvme_update_nvme_error_stat(bdev_io, cpl);
1399 :
1400 12 : if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) ||
1401 8 : (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
1402 5 : goto complete;
1403 : }
1404 :
1405 : /* At this point we don't know whether the sequence was successfully executed or not, so we
1406 : * cannot retry the IO */
1407 7 : if (bdev_io->u.bdev.accel_sequence != NULL) {
1408 0 : goto complete;
1409 : }
1410 :
1411 7 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
1412 :
1413 7 : if (bdev_nvme_check_retry_io(bio, cpl, nbdev_ch, &delay_ms)) {
1414 7 : bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
1415 7 : return;
1416 : }
1417 :
1418 25 : complete:
1419 25 : bio->retry_count = 0;
1420 25 : bio->submit_tsc = 0;
1421 25 : bdev_io->u.bdev.accel_sequence = NULL;
1422 25 : __bdev_nvme_io_complete(bdev_io, 0, cpl);
1423 : }
1424 :
1425 : static inline void
1426 11 : bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
1427 : {
1428 11 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1429 : struct nvme_bdev_channel *nbdev_ch;
1430 : enum spdk_bdev_io_status io_status;
1431 :
1432 11 : assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
1433 :
1434 11 : switch (rc) {
1435 1 : case 0:
1436 1 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1437 1 : break;
1438 0 : case -ENOMEM:
1439 0 : io_status = SPDK_BDEV_IO_STATUS_NOMEM;
1440 0 : break;
1441 10 : case -ENXIO:
1442 10 : if (g_opts.bdev_retry_count == -1 || bio->retry_count < g_opts.bdev_retry_count) {
1443 10 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
1444 :
1445 10 : bdev_nvme_clear_current_io_path(nbdev_ch);
1446 10 : bio->io_path = NULL;
1447 :
1448 10 : if (any_io_path_may_become_available(nbdev_ch)) {
1449 8 : bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
1450 8 : return;
1451 : }
1452 : }
1453 :
1454 : /* fallthrough */
1455 : default:
1456 2 : spdk_accel_sequence_abort(bdev_io->u.bdev.accel_sequence);
1457 2 : bdev_io->u.bdev.accel_sequence = NULL;
1458 2 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
1459 2 : break;
1460 : }
1461 :
1462 3 : bio->retry_count = 0;
1463 3 : bio->submit_tsc = 0;
1464 3 : __bdev_nvme_io_complete(bdev_io, io_status, NULL);
1465 : }
1466 :
1467 : static inline void
1468 4 : bdev_nvme_admin_complete(struct nvme_bdev_io *bio, int rc)
1469 : {
1470 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1471 : enum spdk_bdev_io_status io_status;
1472 :
1473 4 : switch (rc) {
1474 1 : case 0:
1475 1 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1476 1 : break;
1477 0 : case -ENOMEM:
1478 0 : io_status = SPDK_BDEV_IO_STATUS_NOMEM;
1479 0 : break;
1480 3 : case -ENXIO:
1481 : /* fallthrough */
1482 : default:
1483 3 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
1484 3 : break;
1485 : }
1486 :
1487 4 : __bdev_nvme_io_complete(bdev_io, io_status, NULL);
1488 4 : }
1489 :
1490 : static void
1491 3 : bdev_nvme_clear_io_path_caches_done(struct spdk_io_channel_iter *i, int status)
1492 : {
1493 3 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
1494 :
1495 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1496 :
1497 3 : assert(nvme_ctrlr->io_path_cache_clearing == true);
1498 3 : nvme_ctrlr->io_path_cache_clearing = false;
1499 :
1500 3 : if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
1501 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1502 3 : return;
1503 : }
1504 :
1505 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1506 :
1507 0 : nvme_ctrlr_unregister(nvme_ctrlr);
1508 : }
1509 :
1510 : static void
1511 320 : _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
1512 : {
1513 : struct nvme_io_path *io_path;
1514 :
1515 459 : TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
1516 139 : if (io_path->nbdev_ch == NULL) {
1517 64 : continue;
1518 : }
1519 75 : bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
1520 : }
1521 320 : }
1522 :
1523 : static void
1524 1 : bdev_nvme_clear_io_path_cache(struct spdk_io_channel_iter *i)
1525 : {
1526 1 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
1527 1 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
1528 :
1529 1 : assert(ctrlr_ch->qpair != NULL);
1530 :
1531 1 : _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
1532 :
1533 1 : spdk_for_each_channel_continue(i, 0);
1534 1 : }
1535 :
1536 : static void
1537 3 : bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr)
1538 : {
1539 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1540 3 : if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
1541 : nvme_ctrlr->io_path_cache_clearing) {
1542 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1543 0 : return;
1544 : }
1545 :
1546 3 : nvme_ctrlr->io_path_cache_clearing = true;
1547 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1548 :
1549 3 : spdk_for_each_channel(nvme_ctrlr,
1550 : bdev_nvme_clear_io_path_cache,
1551 : NULL,
1552 : bdev_nvme_clear_io_path_caches_done);
1553 : }
1554 :
1555 : static struct nvme_qpair *
1556 99 : nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair *qpair)
1557 : {
1558 : struct nvme_qpair *nvme_qpair;
1559 :
1560 108 : TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
1561 108 : if (nvme_qpair->qpair == qpair) {
1562 99 : break;
1563 : }
1564 : }
1565 :
1566 99 : return nvme_qpair;
1567 : }
1568 :
1569 : static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair);
1570 :
1571 : static void
1572 99 : bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
1573 : {
1574 99 : struct nvme_poll_group *group = poll_group_ctx;
1575 : struct nvme_qpair *nvme_qpair;
1576 : struct nvme_ctrlr_channel *ctrlr_ch;
1577 : int status;
1578 :
1579 99 : nvme_qpair = nvme_poll_group_get_qpair(group, qpair);
1580 99 : if (nvme_qpair == NULL) {
1581 0 : return;
1582 : }
1583 :
1584 99 : if (nvme_qpair->qpair != NULL) {
1585 99 : spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
1586 99 : nvme_qpair->qpair = NULL;
1587 : }
1588 :
1589 99 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1590 :
1591 99 : ctrlr_ch = nvme_qpair->ctrlr_ch;
1592 :
1593 99 : if (ctrlr_ch != NULL) {
1594 56 : if (ctrlr_ch->reset_iter != NULL) {
1595 : /* We are in a full reset sequence. */
1596 52 : if (ctrlr_ch->connect_poller != NULL) {
1597 : /* qpair was failed to connect. Abort the reset sequence. */
1598 0 : SPDK_DEBUGLOG(bdev_nvme, "qpair %p was failed to connect. abort the reset ctrlr sequence.\n",
1599 : qpair);
1600 0 : spdk_poller_unregister(&ctrlr_ch->connect_poller);
1601 0 : status = -1;
1602 : } else {
1603 : /* qpair was completed to disconnect. Just move to the next ctrlr_channel. */
1604 52 : SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed in a reset ctrlr sequence.\n",
1605 : qpair);
1606 52 : status = 0;
1607 : }
1608 52 : spdk_for_each_channel_continue(ctrlr_ch->reset_iter, status);
1609 52 : ctrlr_ch->reset_iter = NULL;
1610 : } else {
1611 : /* qpair was disconnected unexpectedly. Reset controller for recovery. */
1612 4 : SPDK_NOTICELOG("qpair %p was disconnected and freed. reset controller.\n", qpair);
1613 4 : bdev_nvme_failover_ctrlr(nvme_qpair->ctrlr);
1614 : }
1615 : } else {
1616 : /* In this case, ctrlr_channel is already deleted. */
1617 43 : SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed. delete nvme_qpair.\n", qpair);
1618 43 : nvme_qpair_delete(nvme_qpair);
1619 : }
1620 : }
1621 :
1622 : static void
1623 0 : bdev_nvme_check_io_qpairs(struct nvme_poll_group *group)
1624 : {
1625 : struct nvme_qpair *nvme_qpair;
1626 :
1627 0 : TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
1628 0 : if (nvme_qpair->qpair == NULL || nvme_qpair->ctrlr_ch == NULL) {
1629 0 : continue;
1630 : }
1631 :
1632 0 : if (spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
1633 : SPDK_NVME_QPAIR_FAILURE_NONE) {
1634 0 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1635 : }
1636 : }
1637 0 : }
1638 :
1639 : static int
1640 1025 : bdev_nvme_poll(void *arg)
1641 : {
1642 1025 : struct nvme_poll_group *group = arg;
1643 : int64_t num_completions;
1644 :
1645 1025 : if (group->collect_spin_stat && group->start_ticks == 0) {
1646 0 : group->start_ticks = spdk_get_ticks();
1647 : }
1648 :
1649 1025 : num_completions = spdk_nvme_poll_group_process_completions(group->group, 0,
1650 : bdev_nvme_disconnected_qpair_cb);
1651 1025 : if (group->collect_spin_stat) {
1652 0 : if (num_completions > 0) {
1653 0 : if (group->end_ticks != 0) {
1654 0 : group->spin_ticks += (group->end_ticks - group->start_ticks);
1655 0 : group->end_ticks = 0;
1656 : }
1657 0 : group->start_ticks = 0;
1658 : } else {
1659 0 : group->end_ticks = spdk_get_ticks();
1660 : }
1661 : }
1662 :
1663 1025 : if (spdk_unlikely(num_completions < 0)) {
1664 0 : bdev_nvme_check_io_qpairs(group);
1665 : }
1666 :
1667 1025 : return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
1668 : }
1669 :
1670 : static int bdev_nvme_poll_adminq(void *arg);
1671 :
1672 : static void
1673 100 : bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_period_us)
1674 : {
1675 100 : spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
1676 :
1677 100 : nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq,
1678 : nvme_ctrlr, new_period_us);
1679 100 : }
1680 :
1681 : static int
1682 146 : bdev_nvme_poll_adminq(void *arg)
1683 : {
1684 : int32_t rc;
1685 146 : struct nvme_ctrlr *nvme_ctrlr = arg;
1686 : nvme_ctrlr_disconnected_cb disconnected_cb;
1687 :
1688 146 : assert(nvme_ctrlr != NULL);
1689 :
1690 146 : rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
1691 146 : if (rc < 0) {
1692 53 : disconnected_cb = nvme_ctrlr->disconnected_cb;
1693 53 : nvme_ctrlr->disconnected_cb = NULL;
1694 :
1695 53 : if (disconnected_cb != NULL) {
1696 50 : bdev_nvme_change_adminq_poll_period(nvme_ctrlr,
1697 : g_opts.nvme_adminq_poll_period_us);
1698 50 : disconnected_cb(nvme_ctrlr);
1699 : } else {
1700 3 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
1701 : }
1702 93 : } else if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(nvme_ctrlr->ctrlr) !=
1703 : SPDK_NVME_QPAIR_FAILURE_NONE) {
1704 0 : bdev_nvme_clear_io_path_caches(nvme_ctrlr);
1705 : }
1706 :
1707 146 : return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
1708 : }
1709 :
1710 : static void
1711 37 : nvme_bdev_free(void *io_device)
1712 : {
1713 37 : struct nvme_bdev *nvme_disk = io_device;
1714 :
1715 37 : pthread_mutex_destroy(&nvme_disk->mutex);
1716 37 : free(nvme_disk->disk.name);
1717 37 : free(nvme_disk->err_stat);
1718 37 : free(nvme_disk);
1719 37 : }
1720 :
1721 : static int
1722 36 : bdev_nvme_destruct(void *ctx)
1723 : {
1724 36 : struct nvme_bdev *nvme_disk = ctx;
1725 : struct nvme_ns *nvme_ns, *tmp_nvme_ns;
1726 :
1727 : SPDK_DTRACE_PROBE2(bdev_nvme_destruct, nvme_disk->nbdev_ctrlr->name, nvme_disk->nsid);
1728 :
1729 73 : TAILQ_FOREACH_SAFE(nvme_ns, &nvme_disk->nvme_ns_list, tailq, tmp_nvme_ns) {
1730 37 : pthread_mutex_lock(&nvme_ns->ctrlr->mutex);
1731 :
1732 37 : nvme_ns->bdev = NULL;
1733 :
1734 37 : assert(nvme_ns->id > 0);
1735 :
1736 37 : if (nvme_ctrlr_get_ns(nvme_ns->ctrlr, nvme_ns->id) == NULL) {
1737 0 : pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
1738 :
1739 0 : nvme_ctrlr_release(nvme_ns->ctrlr);
1740 0 : nvme_ns_free(nvme_ns);
1741 : } else {
1742 37 : pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
1743 : }
1744 : }
1745 :
1746 36 : pthread_mutex_lock(&g_bdev_nvme_mutex);
1747 36 : TAILQ_REMOVE(&nvme_disk->nbdev_ctrlr->bdevs, nvme_disk, tailq);
1748 36 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
1749 :
1750 36 : spdk_io_device_unregister(nvme_disk, nvme_bdev_free);
1751 :
1752 36 : return 0;
1753 : }
1754 :
1755 : static int
1756 100 : bdev_nvme_create_qpair(struct nvme_qpair *nvme_qpair)
1757 : {
1758 : struct nvme_ctrlr *nvme_ctrlr;
1759 100 : struct spdk_nvme_io_qpair_opts opts;
1760 : struct spdk_nvme_qpair *qpair;
1761 : int rc;
1762 :
1763 100 : nvme_ctrlr = nvme_qpair->ctrlr;
1764 :
1765 100 : spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
1766 100 : opts.delay_cmd_submit = g_opts.delay_cmd_submit;
1767 100 : opts.create_only = true;
1768 100 : opts.async_mode = true;
1769 100 : opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests);
1770 100 : g_opts.io_queue_requests = opts.io_queue_requests;
1771 :
1772 100 : qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
1773 100 : if (qpair == NULL) {
1774 0 : return -1;
1775 : }
1776 :
1777 : SPDK_DTRACE_PROBE3(bdev_nvme_create_qpair, nvme_ctrlr->nbdev_ctrlr->name,
1778 : spdk_nvme_qpair_get_id(qpair), spdk_thread_get_id(nvme_ctrlr->thread));
1779 :
1780 100 : assert(nvme_qpair->group != NULL);
1781 :
1782 100 : rc = spdk_nvme_poll_group_add(nvme_qpair->group->group, qpair);
1783 100 : if (rc != 0) {
1784 0 : SPDK_ERRLOG("Unable to begin polling on NVMe Channel.\n");
1785 0 : goto err;
1786 : }
1787 :
1788 100 : rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_ctrlr->ctrlr, qpair);
1789 100 : if (rc != 0) {
1790 0 : SPDK_ERRLOG("Unable to connect I/O qpair.\n");
1791 0 : goto err;
1792 : }
1793 :
1794 100 : nvme_qpair->qpair = qpair;
1795 :
1796 100 : if (!g_opts.disable_auto_failback) {
1797 71 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1798 : }
1799 :
1800 100 : return 0;
1801 :
1802 0 : err:
1803 0 : spdk_nvme_ctrlr_free_io_qpair(qpair);
1804 :
1805 0 : return rc;
1806 : }
1807 :
1808 : static void bdev_nvme_reset_io_continue(void *cb_arg, int rc);
1809 :
1810 : static void
1811 82 : bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i)
1812 : {
1813 82 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
1814 82 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
1815 82 : int rc = 0;
1816 : struct spdk_bdev_io *bdev_io;
1817 : struct nvme_bdev_io *bio;
1818 :
1819 82 : if (spdk_io_channel_iter_get_ctx(i) != NULL) {
1820 35 : rc = -1;
1821 : }
1822 :
1823 86 : while (!TAILQ_EMPTY(&ctrlr_ch->pending_resets)) {
1824 4 : bdev_io = TAILQ_FIRST(&ctrlr_ch->pending_resets);
1825 4 : TAILQ_REMOVE(&ctrlr_ch->pending_resets, bdev_io, module_link);
1826 :
1827 4 : bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1828 4 : bdev_nvme_reset_io_continue(bio, rc);
1829 : }
1830 :
1831 82 : spdk_for_each_channel_continue(i, 0);
1832 82 : }
1833 :
1834 : /* This function marks the current trid as failed by storing the current ticks
1835 : * and then sets the next trid to the active trid within a controller if exists.
1836 : *
1837 : * The purpose of the boolean return value is to request the caller to disconnect
1838 : * the current trid now to try connecting the next trid.
1839 : */
1840 : static bool
1841 36 : bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove, bool start)
1842 : {
1843 : struct nvme_path_id *path_id, *next_path;
1844 : int rc __attribute__((unused));
1845 :
1846 36 : path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
1847 36 : assert(path_id);
1848 36 : assert(path_id == nvme_ctrlr->active_path_id);
1849 36 : next_path = TAILQ_NEXT(path_id, link);
1850 :
1851 : /* Update the last failed time. It means the trid is failed if its last
1852 : * failed time is non-zero.
1853 : */
1854 36 : path_id->last_failed_tsc = spdk_get_ticks();
1855 :
1856 36 : if (next_path == NULL) {
1857 : /* There is no alternate trid within a controller. */
1858 25 : return false;
1859 : }
1860 :
1861 11 : if (!start && nvme_ctrlr->opts.reconnect_delay_sec == 0) {
1862 : /* Connect is not retried in a controller reset sequence. Connecting
1863 : * the next trid will be done by the next bdev_nvme_failover_ctrlr() call.
1864 : */
1865 3 : return false;
1866 : }
1867 :
1868 8 : assert(path_id->trid.trtype != SPDK_NVME_TRANSPORT_PCIE);
1869 :
1870 8 : SPDK_NOTICELOG("Start failover from %s:%s to %s:%s\n", path_id->trid.traddr,
1871 : path_id->trid.trsvcid, next_path->trid.traddr, next_path->trid.trsvcid);
1872 :
1873 8 : spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
1874 8 : nvme_ctrlr->active_path_id = next_path;
1875 8 : rc = spdk_nvme_ctrlr_set_trid(nvme_ctrlr->ctrlr, &next_path->trid);
1876 8 : assert(rc == 0);
1877 8 : TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
1878 8 : if (!remove) {
1879 : /** Shuffle the old trid to the end of the list and use the new one.
1880 : * Allows for round robin through multiple connections.
1881 : */
1882 6 : TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, path_id, link);
1883 : } else {
1884 2 : free(path_id);
1885 : }
1886 :
1887 8 : if (start || next_path->last_failed_tsc == 0) {
1888 : /* bdev_nvme_failover_ctrlr() is just called or the next trid is not failed
1889 : * or used yet. Try the next trid now.
1890 : */
1891 7 : return true;
1892 : }
1893 :
1894 1 : if (spdk_get_ticks() > next_path->last_failed_tsc + spdk_get_ticks_hz() *
1895 1 : nvme_ctrlr->opts.reconnect_delay_sec) {
1896 : /* Enough backoff passed since the next trid failed. Try the next trid now. */
1897 0 : return true;
1898 : }
1899 :
1900 : /* The next trid will be tried after reconnect_delay_sec seconds. */
1901 1 : return false;
1902 : }
1903 :
1904 : static bool
1905 68 : bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr)
1906 : {
1907 : int32_t elapsed;
1908 :
1909 68 : if (nvme_ctrlr->opts.ctrlr_loss_timeout_sec == 0 ||
1910 36 : nvme_ctrlr->opts.ctrlr_loss_timeout_sec == -1) {
1911 42 : return false;
1912 : }
1913 :
1914 26 : elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
1915 26 : if (elapsed >= nvme_ctrlr->opts.ctrlr_loss_timeout_sec) {
1916 6 : return true;
1917 : } else {
1918 20 : return false;
1919 : }
1920 : }
1921 :
1922 : static bool
1923 12 : bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr)
1924 : {
1925 : uint32_t elapsed;
1926 :
1927 12 : if (nvme_ctrlr->opts.fast_io_fail_timeout_sec == 0) {
1928 8 : return false;
1929 : }
1930 :
1931 4 : elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
1932 4 : if (elapsed >= nvme_ctrlr->opts.fast_io_fail_timeout_sec) {
1933 2 : return true;
1934 : } else {
1935 2 : return false;
1936 : }
1937 : }
1938 :
1939 : static void bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success);
1940 :
1941 : static void
1942 51 : nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb cb_fn)
1943 : {
1944 : int rc;
1945 :
1946 51 : rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
1947 51 : if (rc != 0) {
1948 : /* Disconnect fails if ctrlr is already resetting or removed. In this case,
1949 : * fail the reset sequence immediately.
1950 : */
1951 1 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
1952 1 : return;
1953 : }
1954 :
1955 : /* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
1956 : * Set callback here to execute the specified operation after ctrlr is really disconnected.
1957 : */
1958 50 : assert(nvme_ctrlr->disconnected_cb == NULL);
1959 50 : nvme_ctrlr->disconnected_cb = cb_fn;
1960 :
1961 : /* During disconnection, reduce the period to poll adminq more often. */
1962 50 : bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0);
1963 : }
1964 :
1965 : enum bdev_nvme_op_after_reset {
1966 : OP_NONE,
1967 : OP_COMPLETE_PENDING_DESTRUCT,
1968 : OP_DESTRUCT,
1969 : OP_DELAYED_RECONNECT,
1970 : OP_FAILOVER,
1971 : };
1972 :
1973 : typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset;
1974 :
1975 : static _bdev_nvme_op_after_reset
1976 50 : bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success)
1977 : {
1978 50 : if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
1979 : /* Complete pending destruct after reset completes. */
1980 0 : return OP_COMPLETE_PENDING_DESTRUCT;
1981 50 : } else if (nvme_ctrlr->pending_failover) {
1982 3 : nvme_ctrlr->pending_failover = false;
1983 3 : nvme_ctrlr->reset_start_tsc = 0;
1984 3 : return OP_FAILOVER;
1985 47 : } else if (success || nvme_ctrlr->opts.reconnect_delay_sec == 0) {
1986 33 : nvme_ctrlr->reset_start_tsc = 0;
1987 33 : return OP_NONE;
1988 14 : } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
1989 2 : return OP_DESTRUCT;
1990 : } else {
1991 12 : if (bdev_nvme_check_fast_io_fail_timeout(nvme_ctrlr)) {
1992 2 : nvme_ctrlr->fast_io_fail_timedout = true;
1993 : }
1994 12 : return OP_DELAYED_RECONNECT;
1995 : }
1996 : }
1997 :
1998 : static int bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug);
1999 : static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
2000 :
2001 : static int
2002 9 : bdev_nvme_reconnect_delay_timer_expired(void *ctx)
2003 : {
2004 9 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2005 :
2006 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect_delay, nvme_ctrlr->nbdev_ctrlr->name);
2007 9 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2008 :
2009 9 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2010 :
2011 9 : if (!nvme_ctrlr->reconnect_is_delayed) {
2012 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2013 0 : return SPDK_POLLER_BUSY;
2014 : }
2015 :
2016 9 : nvme_ctrlr->reconnect_is_delayed = false;
2017 :
2018 9 : if (nvme_ctrlr->destruct) {
2019 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2020 0 : return SPDK_POLLER_BUSY;
2021 : }
2022 :
2023 9 : assert(nvme_ctrlr->resetting == false);
2024 9 : nvme_ctrlr->resetting = true;
2025 :
2026 9 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2027 :
2028 9 : spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
2029 :
2030 9 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2031 9 : return SPDK_POLLER_BUSY;
2032 : }
2033 :
2034 : static void
2035 12 : bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)
2036 : {
2037 12 : spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
2038 :
2039 12 : assert(nvme_ctrlr->reconnect_is_delayed == false);
2040 12 : nvme_ctrlr->reconnect_is_delayed = true;
2041 :
2042 12 : assert(nvme_ctrlr->reconnect_delay_timer == NULL);
2043 12 : nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired,
2044 : nvme_ctrlr,
2045 : nvme_ctrlr->opts.reconnect_delay_sec * SPDK_SEC_TO_USEC);
2046 12 : }
2047 :
2048 : static void remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr);
2049 :
2050 : static void
2051 48 : _bdev_nvme_reset_ctrlr_complete(struct spdk_io_channel_iter *i, int status)
2052 : {
2053 48 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2054 48 : bool success = spdk_io_channel_iter_get_ctx(i) == NULL;
2055 48 : bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
2056 48 : void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
2057 : enum bdev_nvme_op_after_reset op_after_reset;
2058 :
2059 48 : assert(nvme_ctrlr->thread == spdk_get_thread());
2060 :
2061 48 : nvme_ctrlr->ctrlr_op_cb_fn = NULL;
2062 48 : nvme_ctrlr->ctrlr_op_cb_arg = NULL;
2063 :
2064 48 : if (!success) {
2065 21 : SPDK_ERRLOG("Resetting controller failed.\n");
2066 : } else {
2067 27 : SPDK_NOTICELOG("Resetting controller successful.\n");
2068 : }
2069 :
2070 48 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2071 48 : nvme_ctrlr->resetting = false;
2072 48 : nvme_ctrlr->dont_retry = false;
2073 48 : nvme_ctrlr->in_failover = false;
2074 :
2075 48 : op_after_reset = bdev_nvme_check_op_after_reset(nvme_ctrlr, success);
2076 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2077 :
2078 : /* Delay callbacks when the next operation is a failover. */
2079 48 : if (ctrlr_op_cb_fn && op_after_reset != OP_FAILOVER) {
2080 10 : ctrlr_op_cb_fn(ctrlr_op_cb_arg, success ? 0 : -1);
2081 : }
2082 :
2083 48 : switch (op_after_reset) {
2084 0 : case OP_COMPLETE_PENDING_DESTRUCT:
2085 0 : nvme_ctrlr_unregister(nvme_ctrlr);
2086 0 : break;
2087 2 : case OP_DESTRUCT:
2088 2 : bdev_nvme_delete_ctrlr(nvme_ctrlr, false);
2089 2 : remove_discovery_entry(nvme_ctrlr);
2090 2 : break;
2091 12 : case OP_DELAYED_RECONNECT:
2092 12 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_start_reconnect_delay_timer);
2093 12 : break;
2094 3 : case OP_FAILOVER:
2095 3 : nvme_ctrlr->ctrlr_op_cb_fn = ctrlr_op_cb_fn;
2096 3 : nvme_ctrlr->ctrlr_op_cb_arg = ctrlr_op_cb_arg;
2097 3 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
2098 3 : break;
2099 31 : default:
2100 31 : break;
2101 : }
2102 48 : }
2103 :
2104 : static void
2105 50 : bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
2106 : {
2107 50 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2108 50 : if (!success) {
2109 : /* Connecting the active trid failed. Set the next alternate trid to the
2110 : * active trid if it exists.
2111 : */
2112 23 : if (bdev_nvme_failover_trid(nvme_ctrlr, false, false)) {
2113 : /* The next alternate trid exists and is ready to try. Try it now. */
2114 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2115 :
2116 2 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
2117 2 : return;
2118 : }
2119 :
2120 : /* We came here if there is no alternate trid or if the next trid exists but
2121 : * is not ready to try. We will try the active trid after reconnect_delay_sec
2122 : * seconds if it is non-zero or at the next reset call otherwise.
2123 : */
2124 : } else {
2125 : /* Connecting the active trid succeeded. Clear the last failed time because it
2126 : * means the trid is failed if its last failed time is non-zero.
2127 : */
2128 27 : nvme_ctrlr->active_path_id->last_failed_tsc = 0;
2129 : }
2130 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2131 :
2132 : /* Make sure we clear any pending resets before returning. */
2133 48 : spdk_for_each_channel(nvme_ctrlr,
2134 : bdev_nvme_complete_pending_resets,
2135 : success ? NULL : (void *)0x1,
2136 : _bdev_nvme_reset_ctrlr_complete);
2137 : }
2138 :
2139 : static void
2140 0 : bdev_nvme_reset_create_qpairs_failed(struct spdk_io_channel_iter *i, int status)
2141 : {
2142 0 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2143 :
2144 0 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2145 0 : }
2146 :
2147 : static void
2148 62 : bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i)
2149 : {
2150 62 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
2151 62 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
2152 : struct nvme_qpair *nvme_qpair;
2153 :
2154 62 : nvme_qpair = ctrlr_ch->qpair;
2155 62 : assert(nvme_qpair != NULL);
2156 :
2157 62 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
2158 :
2159 62 : if (nvme_qpair->qpair != NULL) {
2160 52 : if (nvme_qpair->ctrlr->dont_retry) {
2161 39 : spdk_nvme_qpair_set_abort_dnr(nvme_qpair->qpair, true);
2162 : }
2163 52 : spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
2164 :
2165 : /* The current full reset sequence will move to the next
2166 : * ctrlr_channel after the qpair is actually disconnected.
2167 : */
2168 52 : assert(ctrlr_ch->reset_iter == NULL);
2169 52 : ctrlr_ch->reset_iter = i;
2170 : } else {
2171 10 : spdk_for_each_channel_continue(i, 0);
2172 : }
2173 62 : }
2174 :
2175 : static void
2176 27 : bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status)
2177 : {
2178 27 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2179 :
2180 27 : if (status == 0) {
2181 27 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, true);
2182 : } else {
2183 : /* Delete the added qpairs and quiesce ctrlr to make the states clean. */
2184 0 : spdk_for_each_channel(nvme_ctrlr,
2185 : bdev_nvme_reset_destroy_qpair,
2186 : NULL,
2187 : bdev_nvme_reset_create_qpairs_failed);
2188 : }
2189 27 : }
2190 :
2191 : static int
2192 43 : bdev_nvme_reset_check_qpair_connected(void *ctx)
2193 : {
2194 43 : struct nvme_ctrlr_channel *ctrlr_ch = ctx;
2195 :
2196 43 : if (ctrlr_ch->reset_iter == NULL) {
2197 : /* qpair was already failed to connect and the reset sequence is being aborted. */
2198 0 : assert(ctrlr_ch->connect_poller == NULL);
2199 0 : assert(ctrlr_ch->qpair->qpair == NULL);
2200 0 : return SPDK_POLLER_BUSY;
2201 : }
2202 :
2203 43 : assert(ctrlr_ch->qpair->qpair != NULL);
2204 :
2205 43 : if (!spdk_nvme_qpair_is_connected(ctrlr_ch->qpair->qpair)) {
2206 0 : return SPDK_POLLER_BUSY;
2207 : }
2208 :
2209 43 : spdk_poller_unregister(&ctrlr_ch->connect_poller);
2210 :
2211 : /* qpair was completed to connect. Move to the next ctrlr_channel */
2212 43 : spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
2213 43 : ctrlr_ch->reset_iter = NULL;
2214 :
2215 43 : if (!g_opts.disable_auto_failback) {
2216 30 : _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
2217 : }
2218 :
2219 43 : return SPDK_POLLER_BUSY;
2220 : }
2221 :
2222 : static void
2223 43 : bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i)
2224 : {
2225 43 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
2226 43 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
2227 : int rc;
2228 :
2229 43 : rc = bdev_nvme_create_qpair(ctrlr_ch->qpair);
2230 43 : if (rc == 0) {
2231 43 : ctrlr_ch->connect_poller = SPDK_POLLER_REGISTER(bdev_nvme_reset_check_qpair_connected,
2232 : ctrlr_ch, 0);
2233 :
2234 : /* The current full reset sequence will move to the next
2235 : * ctrlr_channel after the qpair is actually connected.
2236 : */
2237 43 : assert(ctrlr_ch->reset_iter == NULL);
2238 43 : ctrlr_ch->reset_iter = i;
2239 : } else {
2240 0 : spdk_for_each_channel_continue(i, rc);
2241 : }
2242 43 : }
2243 :
2244 : static void
2245 27 : nvme_ctrlr_check_namespaces(struct nvme_ctrlr *nvme_ctrlr)
2246 : {
2247 27 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
2248 : struct nvme_ns *nvme_ns;
2249 :
2250 27 : for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
2251 39 : nvme_ns != NULL;
2252 12 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
2253 12 : if (!spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
2254 1 : SPDK_DEBUGLOG(bdev_nvme, "NSID %u was removed during reset.\n", nvme_ns->id);
2255 : /* NS can be added again. Just nullify nvme_ns->ns. */
2256 1 : nvme_ns->ns = NULL;
2257 : }
2258 : }
2259 27 : }
2260 :
2261 :
2262 : static int
2263 49 : bdev_nvme_reconnect_ctrlr_poll(void *arg)
2264 : {
2265 49 : struct nvme_ctrlr *nvme_ctrlr = arg;
2266 49 : int rc = -ETIMEDOUT;
2267 :
2268 49 : if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
2269 : /* Mark the ctrlr as failed. The next call to
2270 : * spdk_nvme_ctrlr_reconnect_poll_async() will then
2271 : * do the necessary cleanup and return failure.
2272 : */
2273 2 : spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
2274 : }
2275 :
2276 49 : rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
2277 49 : if (rc == -EAGAIN) {
2278 0 : return SPDK_POLLER_BUSY;
2279 : }
2280 :
2281 49 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
2282 49 : if (rc == 0) {
2283 27 : nvme_ctrlr_check_namespaces(nvme_ctrlr);
2284 :
2285 : /* Recreate all of the I/O queue pairs */
2286 27 : spdk_for_each_channel(nvme_ctrlr,
2287 : bdev_nvme_reset_create_qpair,
2288 : NULL,
2289 : bdev_nvme_reset_create_qpairs_done);
2290 : } else {
2291 22 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2292 : }
2293 49 : return SPDK_POLLER_BUSY;
2294 : }
2295 :
2296 : static void
2297 49 : bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2298 : {
2299 49 : spdk_nvme_ctrlr_reconnect_async(nvme_ctrlr->ctrlr);
2300 :
2301 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect, nvme_ctrlr->nbdev_ctrlr->name);
2302 49 : assert(nvme_ctrlr->reset_detach_poller == NULL);
2303 49 : nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_ctrlr_poll,
2304 : nvme_ctrlr, 0);
2305 49 : }
2306 :
2307 : static void
2308 36 : bdev_nvme_reset_destroy_qpair_done(struct spdk_io_channel_iter *i, int status)
2309 : {
2310 36 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2311 :
2312 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reset, nvme_ctrlr->nbdev_ctrlr->name);
2313 36 : assert(status == 0);
2314 :
2315 36 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2316 0 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2317 : } else {
2318 36 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
2319 : }
2320 36 : }
2321 :
2322 : static void
2323 36 : bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
2324 : {
2325 36 : spdk_for_each_channel(nvme_ctrlr,
2326 : bdev_nvme_reset_destroy_qpair,
2327 : NULL,
2328 : bdev_nvme_reset_destroy_qpair_done);
2329 36 : }
2330 :
2331 : static void
2332 3 : bdev_nvme_reconnect_ctrlr_now(void *ctx)
2333 : {
2334 3 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2335 :
2336 3 : assert(nvme_ctrlr->resetting == true);
2337 3 : assert(nvme_ctrlr->thread == spdk_get_thread());
2338 :
2339 3 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2340 :
2341 3 : spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
2342 :
2343 3 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2344 3 : }
2345 :
2346 : static void
2347 36 : _bdev_nvme_reset_ctrlr(void *ctx)
2348 : {
2349 36 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2350 :
2351 36 : assert(nvme_ctrlr->resetting == true);
2352 36 : assert(nvme_ctrlr->thread == spdk_get_thread());
2353 :
2354 36 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2355 0 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reset_destroy_qpairs);
2356 : } else {
2357 36 : bdev_nvme_reset_destroy_qpairs(nvme_ctrlr);
2358 : }
2359 36 : }
2360 :
2361 : static int
2362 34 : bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2363 : {
2364 : spdk_msg_fn msg_fn;
2365 :
2366 34 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2367 34 : if (nvme_ctrlr->destruct) {
2368 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2369 3 : return -ENXIO;
2370 : }
2371 :
2372 31 : if (nvme_ctrlr->resetting) {
2373 6 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2374 6 : SPDK_NOTICELOG("Unable to perform reset, already in progress.\n");
2375 6 : return -EBUSY;
2376 : }
2377 :
2378 25 : if (nvme_ctrlr->disabled) {
2379 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2380 0 : SPDK_NOTICELOG("Unable to perform reset. Controller is disabled.\n");
2381 0 : return -EALREADY;
2382 : }
2383 :
2384 25 : nvme_ctrlr->resetting = true;
2385 25 : nvme_ctrlr->dont_retry = true;
2386 :
2387 25 : if (nvme_ctrlr->reconnect_is_delayed) {
2388 1 : SPDK_DEBUGLOG(bdev_nvme, "Reconnect is already scheduled.\n");
2389 1 : msg_fn = bdev_nvme_reconnect_ctrlr_now;
2390 1 : nvme_ctrlr->reconnect_is_delayed = false;
2391 : } else {
2392 24 : msg_fn = _bdev_nvme_reset_ctrlr;
2393 24 : assert(nvme_ctrlr->reset_start_tsc == 0);
2394 : }
2395 :
2396 25 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2397 :
2398 25 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2399 :
2400 25 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
2401 25 : return 0;
2402 : }
2403 :
2404 : static int
2405 3 : bdev_nvme_enable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2406 : {
2407 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2408 3 : if (nvme_ctrlr->destruct) {
2409 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2410 0 : return -ENXIO;
2411 : }
2412 :
2413 3 : if (nvme_ctrlr->resetting) {
2414 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2415 0 : return -EBUSY;
2416 : }
2417 :
2418 3 : if (!nvme_ctrlr->disabled) {
2419 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2420 1 : return -EALREADY;
2421 : }
2422 :
2423 2 : nvme_ctrlr->disabled = false;
2424 2 : nvme_ctrlr->resetting = true;
2425 :
2426 2 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2427 :
2428 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2429 :
2430 2 : spdk_thread_send_msg(nvme_ctrlr->thread, bdev_nvme_reconnect_ctrlr_now, nvme_ctrlr);
2431 2 : return 0;
2432 : }
2433 :
2434 : static void
2435 2 : _bdev_nvme_disable_ctrlr_complete(struct spdk_io_channel_iter *i, int status)
2436 : {
2437 2 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2438 2 : bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
2439 2 : void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
2440 : enum bdev_nvme_op_after_reset op_after_disable;
2441 :
2442 2 : assert(nvme_ctrlr->thread == spdk_get_thread());
2443 :
2444 2 : nvme_ctrlr->ctrlr_op_cb_fn = NULL;
2445 2 : nvme_ctrlr->ctrlr_op_cb_arg = NULL;
2446 :
2447 2 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2448 :
2449 2 : nvme_ctrlr->resetting = false;
2450 2 : nvme_ctrlr->dont_retry = false;
2451 :
2452 2 : op_after_disable = bdev_nvme_check_op_after_reset(nvme_ctrlr, true);
2453 :
2454 2 : nvme_ctrlr->disabled = true;
2455 2 : spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
2456 :
2457 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2458 :
2459 2 : if (ctrlr_op_cb_fn) {
2460 0 : ctrlr_op_cb_fn(ctrlr_op_cb_arg, 0);
2461 : }
2462 :
2463 2 : switch (op_after_disable) {
2464 0 : case OP_COMPLETE_PENDING_DESTRUCT:
2465 0 : nvme_ctrlr_unregister(nvme_ctrlr);
2466 0 : break;
2467 2 : default:
2468 2 : break;
2469 : }
2470 :
2471 2 : }
2472 :
2473 : static void
2474 2 : bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr)
2475 : {
2476 : /* Make sure we clear any pending resets before returning. */
2477 2 : spdk_for_each_channel(nvme_ctrlr,
2478 : bdev_nvme_complete_pending_resets,
2479 : NULL,
2480 : _bdev_nvme_disable_ctrlr_complete);
2481 2 : }
2482 :
2483 : static void
2484 1 : bdev_nvme_disable_destroy_qpairs_done(struct spdk_io_channel_iter *i, int status)
2485 : {
2486 1 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2487 :
2488 1 : assert(status == 0);
2489 :
2490 1 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2491 0 : bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
2492 : } else {
2493 1 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_ctrlr_complete);
2494 : }
2495 1 : }
2496 :
2497 : static void
2498 1 : bdev_nvme_disable_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
2499 : {
2500 1 : spdk_for_each_channel(nvme_ctrlr,
2501 : bdev_nvme_reset_destroy_qpair,
2502 : NULL,
2503 : bdev_nvme_disable_destroy_qpairs_done);
2504 1 : }
2505 :
2506 : static void
2507 1 : _bdev_nvme_cancel_reconnect_and_disable_ctrlr(void *ctx)
2508 : {
2509 1 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2510 :
2511 1 : assert(nvme_ctrlr->resetting == true);
2512 1 : assert(nvme_ctrlr->thread == spdk_get_thread());
2513 :
2514 1 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2515 :
2516 1 : bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
2517 1 : }
2518 :
2519 : static void
2520 1 : _bdev_nvme_disconnect_and_disable_ctrlr(void *ctx)
2521 : {
2522 1 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2523 :
2524 1 : assert(nvme_ctrlr->resetting == true);
2525 1 : assert(nvme_ctrlr->thread == spdk_get_thread());
2526 :
2527 1 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2528 0 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_destroy_qpairs);
2529 : } else {
2530 1 : bdev_nvme_disable_destroy_qpairs(nvme_ctrlr);
2531 : }
2532 1 : }
2533 :
2534 : static int
2535 5 : bdev_nvme_disable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2536 : {
2537 : spdk_msg_fn msg_fn;
2538 :
2539 5 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2540 5 : if (nvme_ctrlr->destruct) {
2541 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2542 1 : return -ENXIO;
2543 : }
2544 :
2545 4 : if (nvme_ctrlr->resetting) {
2546 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2547 1 : return -EBUSY;
2548 : }
2549 :
2550 3 : if (nvme_ctrlr->disabled) {
2551 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2552 1 : return -EALREADY;
2553 : }
2554 :
2555 2 : nvme_ctrlr->resetting = true;
2556 2 : nvme_ctrlr->dont_retry = true;
2557 :
2558 2 : if (nvme_ctrlr->reconnect_is_delayed) {
2559 1 : msg_fn = _bdev_nvme_cancel_reconnect_and_disable_ctrlr;
2560 1 : nvme_ctrlr->reconnect_is_delayed = false;
2561 : } else {
2562 1 : msg_fn = _bdev_nvme_disconnect_and_disable_ctrlr;
2563 : }
2564 :
2565 2 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2566 :
2567 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2568 :
2569 2 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
2570 2 : return 0;
2571 : }
2572 :
2573 : static int
2574 16 : nvme_ctrlr_op(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
2575 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2576 : {
2577 : int rc;
2578 :
2579 16 : switch (op) {
2580 15 : case NVME_CTRLR_OP_RESET:
2581 15 : rc = bdev_nvme_reset_ctrlr(nvme_ctrlr);
2582 15 : break;
2583 0 : case NVME_CTRLR_OP_ENABLE:
2584 0 : rc = bdev_nvme_enable_ctrlr(nvme_ctrlr);
2585 0 : break;
2586 0 : case NVME_CTRLR_OP_DISABLE:
2587 0 : rc = bdev_nvme_disable_ctrlr(nvme_ctrlr);
2588 0 : break;
2589 1 : default:
2590 1 : rc = -EINVAL;
2591 1 : break;
2592 : }
2593 :
2594 16 : if (rc == 0) {
2595 9 : assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
2596 9 : assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
2597 9 : nvme_ctrlr->ctrlr_op_cb_fn = cb_fn;
2598 9 : nvme_ctrlr->ctrlr_op_cb_arg = cb_arg;
2599 : }
2600 16 : return rc;
2601 : }
2602 :
2603 : struct nvme_ctrlr_op_rpc_ctx {
2604 : struct nvme_ctrlr *nvme_ctrlr;
2605 : struct spdk_thread *orig_thread;
2606 : enum nvme_ctrlr_op op;
2607 : int rc;
2608 : bdev_nvme_ctrlr_op_cb cb_fn;
2609 : void *cb_arg;
2610 : };
2611 :
2612 : static void
2613 4 : _nvme_ctrlr_op_rpc_complete(void *_ctx)
2614 : {
2615 4 : struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
2616 :
2617 4 : assert(ctx != NULL);
2618 4 : assert(ctx->cb_fn != NULL);
2619 :
2620 4 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
2621 :
2622 4 : free(ctx);
2623 4 : }
2624 :
2625 : static void
2626 4 : nvme_ctrlr_op_rpc_complete(void *cb_arg, int rc)
2627 : {
2628 4 : struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
2629 :
2630 4 : ctx->rc = rc;
2631 :
2632 4 : spdk_thread_send_msg(ctx->orig_thread, _nvme_ctrlr_op_rpc_complete, ctx);
2633 4 : }
2634 :
2635 : void
2636 4 : nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
2637 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2638 : {
2639 : struct nvme_ctrlr_op_rpc_ctx *ctx;
2640 : int rc;
2641 :
2642 4 : assert(cb_fn != NULL);
2643 :
2644 4 : ctx = calloc(1, sizeof(*ctx));
2645 4 : if (ctx == NULL) {
2646 0 : SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
2647 0 : cb_fn(cb_arg, -ENOMEM);
2648 0 : return;
2649 : }
2650 :
2651 4 : ctx->orig_thread = spdk_get_thread();
2652 4 : ctx->cb_fn = cb_fn;
2653 4 : ctx->cb_arg = cb_arg;
2654 :
2655 4 : rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_ctrlr_op_rpc_complete, ctx);
2656 4 : if (rc == 0) {
2657 1 : return;
2658 3 : } else if (rc == -EALREADY) {
2659 0 : rc = 0;
2660 : }
2661 :
2662 3 : nvme_ctrlr_op_rpc_complete(ctx, rc);
2663 : }
2664 :
2665 : static void nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc);
2666 :
2667 : static void
2668 2 : _nvme_bdev_ctrlr_op_rpc_continue(void *_ctx)
2669 : {
2670 2 : struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
2671 : struct nvme_ctrlr *prev_nvme_ctrlr, *next_nvme_ctrlr;
2672 : int rc;
2673 :
2674 2 : prev_nvme_ctrlr = ctx->nvme_ctrlr;
2675 2 : ctx->nvme_ctrlr = NULL;
2676 :
2677 2 : if (ctx->rc != 0) {
2678 0 : goto complete;
2679 : }
2680 :
2681 2 : next_nvme_ctrlr = TAILQ_NEXT(prev_nvme_ctrlr, tailq);
2682 2 : if (next_nvme_ctrlr == NULL) {
2683 1 : goto complete;
2684 : }
2685 :
2686 1 : rc = nvme_ctrlr_op(next_nvme_ctrlr, ctx->op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
2687 1 : if (rc == 0) {
2688 1 : ctx->nvme_ctrlr = next_nvme_ctrlr;
2689 1 : return;
2690 0 : } else if (rc == -EALREADY) {
2691 0 : ctx->nvme_ctrlr = next_nvme_ctrlr;
2692 0 : rc = 0;
2693 : }
2694 :
2695 0 : ctx->rc = rc;
2696 :
2697 1 : complete:
2698 1 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
2699 1 : free(ctx);
2700 : }
2701 :
2702 : static void
2703 2 : nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc)
2704 : {
2705 2 : struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
2706 :
2707 2 : ctx->rc = rc;
2708 :
2709 2 : spdk_thread_send_msg(ctx->orig_thread, _nvme_bdev_ctrlr_op_rpc_continue, ctx);
2710 2 : }
2711 :
2712 : void
2713 1 : nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op,
2714 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2715 : {
2716 : struct nvme_ctrlr_op_rpc_ctx *ctx;
2717 : struct nvme_ctrlr *nvme_ctrlr;
2718 : int rc;
2719 :
2720 1 : assert(cb_fn != NULL);
2721 :
2722 1 : ctx = calloc(1, sizeof(*ctx));
2723 1 : if (ctx == NULL) {
2724 0 : SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
2725 0 : cb_fn(cb_arg, -ENOMEM);
2726 0 : return;
2727 : }
2728 :
2729 1 : ctx->orig_thread = spdk_get_thread();
2730 1 : ctx->op = op;
2731 1 : ctx->cb_fn = cb_fn;
2732 1 : ctx->cb_arg = cb_arg;
2733 :
2734 1 : nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
2735 1 : assert(nvme_ctrlr != NULL);
2736 :
2737 1 : rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
2738 1 : if (rc == 0) {
2739 1 : ctx->nvme_ctrlr = nvme_ctrlr;
2740 1 : return;
2741 0 : } else if (rc == -EALREADY) {
2742 0 : ctx->nvme_ctrlr = nvme_ctrlr;
2743 0 : rc = 0;
2744 : }
2745 :
2746 0 : nvme_bdev_ctrlr_op_rpc_continue(ctx, rc);
2747 : }
2748 :
2749 : static int _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio);
2750 :
2751 : static void
2752 7 : _bdev_nvme_reset_io_complete(struct spdk_io_channel_iter *i, int status)
2753 : {
2754 7 : struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i);
2755 : enum spdk_bdev_io_status io_status;
2756 :
2757 7 : if (bio->cpl.cdw0 == 0) {
2758 5 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
2759 : } else {
2760 2 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
2761 : }
2762 :
2763 7 : __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), io_status, NULL);
2764 7 : }
2765 :
2766 : static void
2767 14 : bdev_nvme_abort_bdev_channel(struct spdk_io_channel_iter *i)
2768 : {
2769 14 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
2770 14 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
2771 :
2772 14 : bdev_nvme_abort_retry_ios(nbdev_ch);
2773 :
2774 14 : spdk_for_each_channel_continue(i, 0);
2775 14 : }
2776 :
2777 : static void
2778 7 : bdev_nvme_reset_io_complete(struct nvme_bdev_io *bio)
2779 : {
2780 7 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
2781 7 : struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
2782 :
2783 : /* Abort all queued I/Os for retry. */
2784 7 : spdk_for_each_channel(nbdev,
2785 : bdev_nvme_abort_bdev_channel,
2786 : bio,
2787 : _bdev_nvme_reset_io_complete);
2788 7 : }
2789 :
2790 : static void
2791 10 : _bdev_nvme_reset_io_continue(void *ctx)
2792 : {
2793 10 : struct nvme_bdev_io *bio = ctx;
2794 : struct nvme_io_path *prev_io_path, *next_io_path;
2795 : int rc;
2796 :
2797 10 : prev_io_path = bio->io_path;
2798 10 : bio->io_path = NULL;
2799 :
2800 10 : if (bio->cpl.cdw0 != 0) {
2801 2 : goto complete;
2802 : }
2803 :
2804 8 : next_io_path = STAILQ_NEXT(prev_io_path, stailq);
2805 8 : if (next_io_path == NULL) {
2806 5 : goto complete;
2807 : }
2808 :
2809 3 : rc = _bdev_nvme_reset_io(next_io_path, bio);
2810 3 : if (rc == 0) {
2811 3 : return;
2812 : }
2813 :
2814 0 : bio->cpl.cdw0 = 1;
2815 :
2816 7 : complete:
2817 7 : bdev_nvme_reset_io_complete(bio);
2818 : }
2819 :
2820 : static void
2821 10 : bdev_nvme_reset_io_continue(void *cb_arg, int rc)
2822 : {
2823 10 : struct nvme_bdev_io *bio = cb_arg;
2824 10 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
2825 :
2826 10 : bio->cpl.cdw0 = (rc == 0) ? 0 : 1;
2827 :
2828 10 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), _bdev_nvme_reset_io_continue, bio);
2829 10 : }
2830 :
2831 : static int
2832 10 : _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
2833 : {
2834 : struct nvme_ctrlr_channel *ctrlr_ch;
2835 : struct spdk_bdev_io *bdev_io;
2836 : int rc;
2837 :
2838 10 : rc = nvme_ctrlr_op(io_path->qpair->ctrlr, NVME_CTRLR_OP_RESET,
2839 : bdev_nvme_reset_io_continue, bio);
2840 10 : if (rc != 0 && rc != -EBUSY) {
2841 0 : return rc;
2842 : }
2843 :
2844 10 : assert(bio->io_path == NULL);
2845 10 : bio->io_path = io_path;
2846 :
2847 10 : if (rc == -EBUSY) {
2848 4 : ctrlr_ch = io_path->qpair->ctrlr_ch;
2849 4 : assert(ctrlr_ch != NULL);
2850 : /*
2851 : * Reset call is queued only if it is from the app framework. This is on purpose so that
2852 : * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
2853 : * upper level. If they are in the middle of a reset, we won't try to schedule another one.
2854 : */
2855 4 : bdev_io = spdk_bdev_io_from_ctx(bio);
2856 4 : TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bdev_io, module_link);
2857 : }
2858 :
2859 10 : return 0;
2860 : }
2861 :
2862 : static void
2863 7 : bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio)
2864 : {
2865 : struct nvme_io_path *io_path;
2866 : int rc;
2867 :
2868 7 : bio->cpl.cdw0 = 0;
2869 :
2870 : /* Reset all nvme_ctrlrs of a bdev controller sequentially. */
2871 7 : io_path = STAILQ_FIRST(&nbdev_ch->io_path_list);
2872 7 : assert(io_path != NULL);
2873 :
2874 7 : rc = _bdev_nvme_reset_io(io_path, bio);
2875 7 : if (rc != 0) {
2876 : /* If the current nvme_ctrlr is disabled, skip it and move to the next nvme_ctrlr. */
2877 0 : rc = (rc == -EALREADY) ? 0 : rc;
2878 :
2879 0 : bdev_nvme_reset_io_continue(bio, rc);
2880 : }
2881 7 : }
2882 :
2883 : static int
2884 18 : bdev_nvme_failover_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool remove)
2885 : {
2886 18 : if (nvme_ctrlr->destruct) {
2887 : /* Don't bother resetting if the controller is in the process of being destructed. */
2888 2 : return -ENXIO;
2889 : }
2890 :
2891 16 : if (nvme_ctrlr->resetting) {
2892 3 : if (!nvme_ctrlr->in_failover) {
2893 3 : SPDK_NOTICELOG("Reset is already in progress. Defer failover until reset completes.\n");
2894 :
2895 : /* Defer failover until reset completes. */
2896 3 : nvme_ctrlr->pending_failover = true;
2897 3 : return -EINPROGRESS;
2898 : } else {
2899 0 : SPDK_NOTICELOG("Unable to perform failover, already in progress.\n");
2900 0 : return -EBUSY;
2901 : }
2902 : }
2903 :
2904 13 : bdev_nvme_failover_trid(nvme_ctrlr, remove, true);
2905 :
2906 13 : if (nvme_ctrlr->reconnect_is_delayed) {
2907 1 : SPDK_NOTICELOG("Reconnect is already scheduled.\n");
2908 :
2909 : /* We rely on the next reconnect for the failover. */
2910 1 : return -EALREADY;
2911 : }
2912 :
2913 12 : if (nvme_ctrlr->disabled) {
2914 0 : SPDK_NOTICELOG("Controller is disabled.\n");
2915 :
2916 : /* We rely on the enablement for the failover. */
2917 0 : return -EALREADY;
2918 : }
2919 :
2920 12 : nvme_ctrlr->resetting = true;
2921 12 : nvme_ctrlr->in_failover = true;
2922 :
2923 12 : assert(nvme_ctrlr->reset_start_tsc == 0);
2924 12 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2925 :
2926 12 : return 0;
2927 : }
2928 :
2929 : static int
2930 16 : bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2931 : {
2932 : int rc;
2933 :
2934 16 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2935 16 : rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, false);
2936 16 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2937 :
2938 16 : if (rc == 0) {
2939 11 : spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset_ctrlr, nvme_ctrlr);
2940 5 : } else if (rc == -EALREADY) {
2941 0 : rc = 0;
2942 : }
2943 :
2944 16 : return rc;
2945 : }
2946 :
2947 : static int bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks,
2948 : uint64_t num_blocks);
2949 :
2950 : static int bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks,
2951 : uint64_t num_blocks);
2952 :
2953 : static int bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks,
2954 : uint64_t src_offset_blocks,
2955 : uint64_t num_blocks);
2956 :
2957 : static void
2958 1 : bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
2959 : bool success)
2960 : {
2961 1 : struct nvme_bdev_io *bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
2962 : int ret;
2963 :
2964 1 : if (!success) {
2965 0 : ret = -EINVAL;
2966 0 : goto exit;
2967 : }
2968 :
2969 1 : if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
2970 0 : ret = -ENXIO;
2971 0 : goto exit;
2972 : }
2973 :
2974 1 : ret = bdev_nvme_readv(bio,
2975 : bdev_io->u.bdev.iovs,
2976 : bdev_io->u.bdev.iovcnt,
2977 : bdev_io->u.bdev.md_buf,
2978 : bdev_io->u.bdev.num_blocks,
2979 : bdev_io->u.bdev.offset_blocks,
2980 : bdev_io->u.bdev.dif_check_flags,
2981 : bdev_io->u.bdev.memory_domain,
2982 : bdev_io->u.bdev.memory_domain_ctx,
2983 : bdev_io->u.bdev.accel_sequence);
2984 :
2985 1 : exit:
2986 1 : if (spdk_unlikely(ret != 0)) {
2987 0 : bdev_nvme_io_complete(bio, ret);
2988 : }
2989 1 : }
2990 :
2991 : static inline void
2992 51 : _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
2993 : {
2994 51 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
2995 51 : struct spdk_bdev *bdev = bdev_io->bdev;
2996 : struct nvme_bdev_io *nbdev_io_to_abort;
2997 51 : int rc = 0;
2998 :
2999 51 : switch (bdev_io->type) {
3000 3 : case SPDK_BDEV_IO_TYPE_READ:
3001 3 : if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) {
3002 :
3003 2 : rc = bdev_nvme_readv(nbdev_io,
3004 : bdev_io->u.bdev.iovs,
3005 : bdev_io->u.bdev.iovcnt,
3006 : bdev_io->u.bdev.md_buf,
3007 : bdev_io->u.bdev.num_blocks,
3008 : bdev_io->u.bdev.offset_blocks,
3009 : bdev_io->u.bdev.dif_check_flags,
3010 : bdev_io->u.bdev.memory_domain,
3011 : bdev_io->u.bdev.memory_domain_ctx,
3012 : bdev_io->u.bdev.accel_sequence);
3013 : } else {
3014 1 : spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
3015 1 : bdev_io->u.bdev.num_blocks * bdev->blocklen);
3016 1 : rc = 0;
3017 : }
3018 3 : break;
3019 25 : case SPDK_BDEV_IO_TYPE_WRITE:
3020 25 : rc = bdev_nvme_writev(nbdev_io,
3021 : bdev_io->u.bdev.iovs,
3022 : bdev_io->u.bdev.iovcnt,
3023 : bdev_io->u.bdev.md_buf,
3024 : bdev_io->u.bdev.num_blocks,
3025 : bdev_io->u.bdev.offset_blocks,
3026 : bdev_io->u.bdev.dif_check_flags,
3027 : bdev_io->u.bdev.memory_domain,
3028 : bdev_io->u.bdev.memory_domain_ctx,
3029 : bdev_io->u.bdev.accel_sequence,
3030 : bdev_io->u.bdev.nvme_cdw12,
3031 : bdev_io->u.bdev.nvme_cdw13);
3032 25 : break;
3033 1 : case SPDK_BDEV_IO_TYPE_COMPARE:
3034 1 : rc = bdev_nvme_comparev(nbdev_io,
3035 : bdev_io->u.bdev.iovs,
3036 : bdev_io->u.bdev.iovcnt,
3037 : bdev_io->u.bdev.md_buf,
3038 : bdev_io->u.bdev.num_blocks,
3039 : bdev_io->u.bdev.offset_blocks,
3040 : bdev_io->u.bdev.dif_check_flags);
3041 1 : break;
3042 2 : case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
3043 2 : rc = bdev_nvme_comparev_and_writev(nbdev_io,
3044 : bdev_io->u.bdev.iovs,
3045 : bdev_io->u.bdev.iovcnt,
3046 : bdev_io->u.bdev.fused_iovs,
3047 : bdev_io->u.bdev.fused_iovcnt,
3048 : bdev_io->u.bdev.md_buf,
3049 : bdev_io->u.bdev.num_blocks,
3050 : bdev_io->u.bdev.offset_blocks,
3051 : bdev_io->u.bdev.dif_check_flags);
3052 2 : break;
3053 1 : case SPDK_BDEV_IO_TYPE_UNMAP:
3054 1 : rc = bdev_nvme_unmap(nbdev_io,
3055 : bdev_io->u.bdev.offset_blocks,
3056 : bdev_io->u.bdev.num_blocks);
3057 1 : break;
3058 0 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3059 0 : rc = bdev_nvme_write_zeroes(nbdev_io,
3060 : bdev_io->u.bdev.offset_blocks,
3061 : bdev_io->u.bdev.num_blocks);
3062 0 : break;
3063 7 : case SPDK_BDEV_IO_TYPE_RESET:
3064 7 : nbdev_io->io_path = NULL;
3065 7 : bdev_nvme_reset_io(nbdev_ch, nbdev_io);
3066 7 : return;
3067 :
3068 1 : case SPDK_BDEV_IO_TYPE_FLUSH:
3069 1 : bdev_nvme_io_complete(nbdev_io, 0);
3070 1 : return;
3071 :
3072 0 : case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
3073 0 : rc = bdev_nvme_zone_appendv(nbdev_io,
3074 : bdev_io->u.bdev.iovs,
3075 : bdev_io->u.bdev.iovcnt,
3076 : bdev_io->u.bdev.md_buf,
3077 : bdev_io->u.bdev.num_blocks,
3078 : bdev_io->u.bdev.offset_blocks,
3079 : bdev_io->u.bdev.dif_check_flags);
3080 0 : break;
3081 0 : case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
3082 0 : rc = bdev_nvme_get_zone_info(nbdev_io,
3083 : bdev_io->u.zone_mgmt.zone_id,
3084 : bdev_io->u.zone_mgmt.num_zones,
3085 0 : bdev_io->u.zone_mgmt.buf);
3086 0 : break;
3087 0 : case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
3088 0 : rc = bdev_nvme_zone_management(nbdev_io,
3089 : bdev_io->u.zone_mgmt.zone_id,
3090 : bdev_io->u.zone_mgmt.zone_action);
3091 0 : break;
3092 5 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3093 5 : nbdev_io->io_path = NULL;
3094 5 : bdev_nvme_admin_passthru(nbdev_ch,
3095 : nbdev_io,
3096 : &bdev_io->u.nvme_passthru.cmd,
3097 : bdev_io->u.nvme_passthru.buf,
3098 : bdev_io->u.nvme_passthru.nbytes);
3099 5 : return;
3100 :
3101 0 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3102 0 : rc = bdev_nvme_io_passthru(nbdev_io,
3103 : &bdev_io->u.nvme_passthru.cmd,
3104 : bdev_io->u.nvme_passthru.buf,
3105 : bdev_io->u.nvme_passthru.nbytes);
3106 0 : break;
3107 0 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3108 0 : rc = bdev_nvme_io_passthru_md(nbdev_io,
3109 : &bdev_io->u.nvme_passthru.cmd,
3110 : bdev_io->u.nvme_passthru.buf,
3111 : bdev_io->u.nvme_passthru.nbytes,
3112 : bdev_io->u.nvme_passthru.md_buf,
3113 : bdev_io->u.nvme_passthru.md_len);
3114 0 : break;
3115 0 : case SPDK_BDEV_IO_TYPE_NVME_IOV_MD:
3116 0 : rc = bdev_nvme_iov_passthru_md(nbdev_io,
3117 : &bdev_io->u.nvme_passthru.cmd,
3118 : bdev_io->u.nvme_passthru.iovs,
3119 : bdev_io->u.nvme_passthru.iovcnt,
3120 : bdev_io->u.nvme_passthru.nbytes,
3121 : bdev_io->u.nvme_passthru.md_buf,
3122 : bdev_io->u.nvme_passthru.md_len);
3123 0 : break;
3124 6 : case SPDK_BDEV_IO_TYPE_ABORT:
3125 6 : nbdev_io->io_path = NULL;
3126 6 : nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
3127 6 : bdev_nvme_abort(nbdev_ch,
3128 : nbdev_io,
3129 : nbdev_io_to_abort);
3130 6 : return;
3131 :
3132 0 : case SPDK_BDEV_IO_TYPE_COPY:
3133 0 : rc = bdev_nvme_copy(nbdev_io,
3134 : bdev_io->u.bdev.offset_blocks,
3135 : bdev_io->u.bdev.copy.src_offset_blocks,
3136 : bdev_io->u.bdev.num_blocks);
3137 0 : break;
3138 0 : default:
3139 0 : rc = -EINVAL;
3140 0 : break;
3141 : }
3142 :
3143 32 : if (spdk_unlikely(rc != 0)) {
3144 0 : bdev_nvme_io_complete(nbdev_io, rc);
3145 : }
3146 : }
3147 :
3148 : static void
3149 58 : bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
3150 : {
3151 58 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
3152 58 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
3153 :
3154 58 : if (spdk_likely(nbdev_io->submit_tsc == 0)) {
3155 58 : nbdev_io->submit_tsc = spdk_bdev_io_get_submit_tsc(bdev_io);
3156 : } else {
3157 : /* There are cases where submit_tsc != 0, i.e. retry I/O.
3158 : * We need to update submit_tsc here.
3159 : */
3160 0 : nbdev_io->submit_tsc = spdk_get_ticks();
3161 : }
3162 :
3163 58 : spdk_trace_record(TRACE_BDEV_NVME_IO_START, 0, 0, (uintptr_t)nbdev_io, (uintptr_t)bdev_io);
3164 58 : nbdev_io->io_path = bdev_nvme_find_io_path(nbdev_ch);
3165 58 : if (spdk_unlikely(!nbdev_io->io_path)) {
3166 11 : if (!bdev_nvme_io_type_is_admin(bdev_io->type)) {
3167 10 : bdev_nvme_io_complete(nbdev_io, -ENXIO);
3168 10 : return;
3169 : }
3170 :
3171 : /* Admin commands do not use the optimal I/O path.
3172 : * Simply fall through even if it is not found.
3173 : */
3174 : }
3175 :
3176 48 : _bdev_nvme_submit_request(nbdev_ch, bdev_io);
3177 : }
3178 :
3179 : static bool
3180 0 : bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
3181 : {
3182 0 : struct nvme_bdev *nbdev = ctx;
3183 : struct nvme_ns *nvme_ns;
3184 : struct spdk_nvme_ns *ns;
3185 : struct spdk_nvme_ctrlr *ctrlr;
3186 : const struct spdk_nvme_ctrlr_data *cdata;
3187 :
3188 0 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
3189 0 : assert(nvme_ns != NULL);
3190 0 : ns = nvme_ns->ns;
3191 0 : if (ns == NULL) {
3192 0 : return false;
3193 : }
3194 :
3195 0 : ctrlr = spdk_nvme_ns_get_ctrlr(ns);
3196 :
3197 0 : switch (io_type) {
3198 0 : case SPDK_BDEV_IO_TYPE_READ:
3199 : case SPDK_BDEV_IO_TYPE_WRITE:
3200 : case SPDK_BDEV_IO_TYPE_RESET:
3201 : case SPDK_BDEV_IO_TYPE_FLUSH:
3202 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3203 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3204 : case SPDK_BDEV_IO_TYPE_ABORT:
3205 0 : return true;
3206 :
3207 0 : case SPDK_BDEV_IO_TYPE_COMPARE:
3208 0 : return spdk_nvme_ns_supports_compare(ns);
3209 :
3210 0 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3211 0 : return spdk_nvme_ns_get_md_size(ns) ? true : false;
3212 :
3213 0 : case SPDK_BDEV_IO_TYPE_UNMAP:
3214 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3215 0 : return cdata->oncs.dsm;
3216 :
3217 0 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3218 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3219 0 : return cdata->oncs.write_zeroes;
3220 :
3221 0 : case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
3222 0 : if (spdk_nvme_ctrlr_get_flags(ctrlr) &
3223 : SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) {
3224 0 : return true;
3225 : }
3226 0 : return false;
3227 :
3228 0 : case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
3229 : case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
3230 0 : return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS;
3231 :
3232 0 : case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
3233 0 : return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS &&
3234 0 : spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
3235 :
3236 0 : case SPDK_BDEV_IO_TYPE_COPY:
3237 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3238 0 : return cdata->oncs.copy;
3239 :
3240 0 : default:
3241 0 : return false;
3242 : }
3243 : }
3244 :
3245 : static int
3246 57 : nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrlr_ch)
3247 : {
3248 : struct nvme_qpair *nvme_qpair;
3249 : struct spdk_io_channel *pg_ch;
3250 : int rc;
3251 :
3252 57 : nvme_qpair = calloc(1, sizeof(*nvme_qpair));
3253 57 : if (!nvme_qpair) {
3254 0 : SPDK_ERRLOG("Failed to alloc nvme_qpair.\n");
3255 0 : return -1;
3256 : }
3257 :
3258 57 : TAILQ_INIT(&nvme_qpair->io_path_list);
3259 :
3260 57 : nvme_qpair->ctrlr = nvme_ctrlr;
3261 57 : nvme_qpair->ctrlr_ch = ctrlr_ch;
3262 :
3263 57 : pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);
3264 57 : if (!pg_ch) {
3265 0 : free(nvme_qpair);
3266 0 : return -1;
3267 : }
3268 :
3269 57 : nvme_qpair->group = spdk_io_channel_get_ctx(pg_ch);
3270 :
3271 : #ifdef SPDK_CONFIG_VTUNE
3272 : nvme_qpair->group->collect_spin_stat = true;
3273 : #else
3274 57 : nvme_qpair->group->collect_spin_stat = false;
3275 : #endif
3276 :
3277 57 : if (!nvme_ctrlr->disabled) {
3278 : /* If a nvme_ctrlr is disabled, don't try to create qpair for it. Qpair will
3279 : * be created when it's enabled.
3280 : */
3281 57 : rc = bdev_nvme_create_qpair(nvme_qpair);
3282 57 : if (rc != 0) {
3283 : /* nvme_ctrlr can't create IO qpair if connection is down.
3284 : * If reconnect_delay_sec is non-zero, creating IO qpair is retried
3285 : * after reconnect_delay_sec seconds. If bdev_retry_count is non-zero,
3286 : * submitted IO will be queued until IO qpair is successfully created.
3287 : *
3288 : * Hence, if both are satisfied, ignore the failure.
3289 : */
3290 0 : if (nvme_ctrlr->opts.reconnect_delay_sec == 0 || g_opts.bdev_retry_count == 0) {
3291 0 : spdk_put_io_channel(pg_ch);
3292 0 : free(nvme_qpair);
3293 0 : return rc;
3294 : }
3295 : }
3296 : }
3297 :
3298 57 : TAILQ_INSERT_TAIL(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
3299 :
3300 57 : ctrlr_ch->qpair = nvme_qpair;
3301 :
3302 57 : pthread_mutex_lock(&nvme_qpair->ctrlr->mutex);
3303 57 : nvme_qpair->ctrlr->ref++;
3304 57 : pthread_mutex_unlock(&nvme_qpair->ctrlr->mutex);
3305 :
3306 57 : return 0;
3307 : }
3308 :
3309 : static int
3310 57 : bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
3311 : {
3312 57 : struct nvme_ctrlr *nvme_ctrlr = io_device;
3313 57 : struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
3314 :
3315 57 : TAILQ_INIT(&ctrlr_ch->pending_resets);
3316 :
3317 57 : return nvme_qpair_create(nvme_ctrlr, ctrlr_ch);
3318 : }
3319 :
3320 : static void
3321 57 : nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
3322 : {
3323 : struct nvme_io_path *io_path, *next;
3324 :
3325 57 : assert(nvme_qpair->group != NULL);
3326 :
3327 92 : TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
3328 35 : TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
3329 35 : nvme_io_path_free(io_path);
3330 : }
3331 :
3332 57 : TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
3333 :
3334 57 : spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
3335 :
3336 57 : nvme_ctrlr_release(nvme_qpair->ctrlr);
3337 :
3338 57 : free(nvme_qpair);
3339 57 : }
3340 :
3341 : static void
3342 57 : bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
3343 : {
3344 57 : struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
3345 : struct nvme_qpair *nvme_qpair;
3346 :
3347 57 : nvme_qpair = ctrlr_ch->qpair;
3348 57 : assert(nvme_qpair != NULL);
3349 :
3350 57 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
3351 :
3352 57 : if (nvme_qpair->qpair != NULL) {
3353 43 : if (ctrlr_ch->reset_iter == NULL) {
3354 43 : spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
3355 : } else {
3356 : /* Skip current ctrlr_channel in a full reset sequence because
3357 : * it is being deleted now. The qpair is already being disconnected.
3358 : * We do not have to restart disconnecting it.
3359 : */
3360 0 : spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
3361 : }
3362 :
3363 : /* We cannot release a reference to the poll group now.
3364 : * The qpair may be disconnected asynchronously later.
3365 : * We need to poll it until it is actually disconnected.
3366 : * Just detach the qpair from the deleting ctrlr_channel.
3367 : */
3368 43 : nvme_qpair->ctrlr_ch = NULL;
3369 : } else {
3370 14 : assert(ctrlr_ch->reset_iter == NULL);
3371 :
3372 14 : nvme_qpair_delete(nvme_qpair);
3373 : }
3374 57 : }
3375 :
3376 : static inline struct spdk_io_channel *
3377 0 : bdev_nvme_get_accel_channel(struct nvme_poll_group *group)
3378 : {
3379 0 : if (spdk_unlikely(!group->accel_channel)) {
3380 0 : group->accel_channel = spdk_accel_get_io_channel();
3381 0 : if (!group->accel_channel) {
3382 0 : SPDK_ERRLOG("Cannot get the accel_channel for bdev nvme polling group=%p\n",
3383 : group);
3384 0 : return NULL;
3385 : }
3386 : }
3387 :
3388 0 : return group->accel_channel;
3389 : }
3390 :
3391 : static void
3392 0 : bdev_nvme_submit_accel_crc32c(void *ctx, uint32_t *dst, struct iovec *iov,
3393 : uint32_t iov_cnt, uint32_t seed,
3394 : spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
3395 : {
3396 : struct spdk_io_channel *accel_ch;
3397 0 : struct nvme_poll_group *group = ctx;
3398 : int rc;
3399 :
3400 0 : assert(cb_fn != NULL);
3401 :
3402 0 : accel_ch = bdev_nvme_get_accel_channel(group);
3403 0 : if (spdk_unlikely(accel_ch == NULL)) {
3404 0 : cb_fn(cb_arg, -ENOMEM);
3405 0 : return;
3406 : }
3407 :
3408 0 : rc = spdk_accel_submit_crc32cv(accel_ch, dst, iov, iov_cnt, seed, cb_fn, cb_arg);
3409 0 : if (rc) {
3410 : /* For the two cases, spdk_accel_submit_crc32cv does not call the user's cb_fn */
3411 0 : if (rc == -ENOMEM || rc == -EINVAL) {
3412 0 : cb_fn(cb_arg, rc);
3413 : }
3414 0 : SPDK_ERRLOG("Cannot complete the accelerated crc32c operation with iov=%p\n", iov);
3415 : }
3416 : }
3417 :
3418 : static void
3419 0 : bdev_nvme_finish_sequence(void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
3420 : {
3421 0 : spdk_accel_sequence_finish(seq, cb_fn, cb_arg);
3422 0 : }
3423 :
3424 : static void
3425 0 : bdev_nvme_abort_sequence(void *seq)
3426 : {
3427 0 : spdk_accel_sequence_abort(seq);
3428 0 : }
3429 :
3430 : static void
3431 0 : bdev_nvme_reverse_sequence(void *seq)
3432 : {
3433 0 : spdk_accel_sequence_reverse(seq);
3434 0 : }
3435 :
3436 : static int
3437 0 : bdev_nvme_append_crc32c(void *ctx, void **seq, uint32_t *dst, struct iovec *iovs, uint32_t iovcnt,
3438 : struct spdk_memory_domain *domain, void *domain_ctx, uint32_t seed,
3439 : spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
3440 : {
3441 : struct spdk_io_channel *ch;
3442 0 : struct nvme_poll_group *group = ctx;
3443 :
3444 0 : ch = bdev_nvme_get_accel_channel(group);
3445 0 : if (spdk_unlikely(ch == NULL)) {
3446 0 : return -ENOMEM;
3447 : }
3448 :
3449 0 : return spdk_accel_append_crc32c((struct spdk_accel_sequence **)seq, ch, dst, iovs, iovcnt,
3450 : domain, domain_ctx, seed, cb_fn, cb_arg);
3451 : }
3452 :
3453 : static struct spdk_nvme_accel_fn_table g_bdev_nvme_accel_fn_table = {
3454 : .table_size = sizeof(struct spdk_nvme_accel_fn_table),
3455 : .submit_accel_crc32c = bdev_nvme_submit_accel_crc32c,
3456 : .append_crc32c = bdev_nvme_append_crc32c,
3457 : .finish_sequence = bdev_nvme_finish_sequence,
3458 : .reverse_sequence = bdev_nvme_reverse_sequence,
3459 : .abort_sequence = bdev_nvme_abort_sequence,
3460 : };
3461 :
3462 : static int
3463 42 : bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf)
3464 : {
3465 42 : struct nvme_poll_group *group = ctx_buf;
3466 :
3467 42 : TAILQ_INIT(&group->qpair_list);
3468 :
3469 42 : group->group = spdk_nvme_poll_group_create(group, &g_bdev_nvme_accel_fn_table);
3470 42 : if (group->group == NULL) {
3471 0 : return -1;
3472 : }
3473 :
3474 42 : group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us);
3475 :
3476 42 : if (group->poller == NULL) {
3477 0 : spdk_nvme_poll_group_destroy(group->group);
3478 0 : return -1;
3479 : }
3480 :
3481 42 : return 0;
3482 : }
3483 :
3484 : static void
3485 42 : bdev_nvme_destroy_poll_group_cb(void *io_device, void *ctx_buf)
3486 : {
3487 42 : struct nvme_poll_group *group = ctx_buf;
3488 :
3489 42 : assert(TAILQ_EMPTY(&group->qpair_list));
3490 :
3491 42 : if (group->accel_channel) {
3492 0 : spdk_put_io_channel(group->accel_channel);
3493 : }
3494 :
3495 42 : spdk_poller_unregister(&group->poller);
3496 42 : if (spdk_nvme_poll_group_destroy(group->group)) {
3497 0 : SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n");
3498 0 : assert(false);
3499 : }
3500 42 : }
3501 :
3502 : static struct spdk_io_channel *
3503 0 : bdev_nvme_get_io_channel(void *ctx)
3504 : {
3505 0 : struct nvme_bdev *nvme_bdev = ctx;
3506 :
3507 0 : return spdk_get_io_channel(nvme_bdev);
3508 : }
3509 :
3510 : static void *
3511 0 : bdev_nvme_get_module_ctx(void *ctx)
3512 : {
3513 0 : struct nvme_bdev *nvme_bdev = ctx;
3514 : struct nvme_ns *nvme_ns;
3515 :
3516 0 : if (!nvme_bdev || nvme_bdev->disk.module != &nvme_if) {
3517 0 : return NULL;
3518 : }
3519 :
3520 0 : nvme_ns = TAILQ_FIRST(&nvme_bdev->nvme_ns_list);
3521 0 : if (!nvme_ns) {
3522 0 : return NULL;
3523 : }
3524 :
3525 0 : return nvme_ns->ns;
3526 : }
3527 :
3528 : static const char *
3529 0 : _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state)
3530 : {
3531 0 : switch (ana_state) {
3532 0 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
3533 0 : return "optimized";
3534 0 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
3535 0 : return "non_optimized";
3536 0 : case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3537 0 : return "inaccessible";
3538 0 : case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3539 0 : return "persistent_loss";
3540 0 : case SPDK_NVME_ANA_CHANGE_STATE:
3541 0 : return "change";
3542 0 : default:
3543 0 : return NULL;
3544 : }
3545 : }
3546 :
3547 : static int
3548 8 : bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
3549 : {
3550 8 : struct spdk_memory_domain **_domains = NULL;
3551 8 : struct nvme_bdev *nbdev = ctx;
3552 : struct nvme_ns *nvme_ns;
3553 8 : int i = 0, _array_size = array_size;
3554 8 : int rc = 0;
3555 :
3556 22 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
3557 14 : if (domains && array_size >= i) {
3558 11 : _domains = &domains[i];
3559 : } else {
3560 3 : _domains = NULL;
3561 : }
3562 14 : rc = spdk_nvme_ctrlr_get_memory_domains(nvme_ns->ctrlr->ctrlr, _domains, _array_size);
3563 14 : if (rc > 0) {
3564 13 : i += rc;
3565 13 : if (_array_size >= rc) {
3566 9 : _array_size -= rc;
3567 : } else {
3568 4 : _array_size = 0;
3569 : }
3570 1 : } else if (rc < 0) {
3571 0 : return rc;
3572 : }
3573 : }
3574 :
3575 8 : return i;
3576 : }
3577 :
3578 : static const char *
3579 0 : nvme_ctrlr_get_state_str(struct nvme_ctrlr *nvme_ctrlr)
3580 : {
3581 0 : if (nvme_ctrlr->destruct) {
3582 0 : return "deleting";
3583 0 : } else if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
3584 0 : return "failed";
3585 0 : } else if (nvme_ctrlr->resetting) {
3586 0 : return "resetting";
3587 0 : } else if (nvme_ctrlr->reconnect_is_delayed > 0) {
3588 0 : return "reconnect_is_delayed";
3589 0 : } else if (nvme_ctrlr->disabled) {
3590 0 : return "disabled";
3591 : } else {
3592 0 : return "enabled";
3593 : }
3594 : }
3595 :
3596 : void
3597 0 : nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr)
3598 0 : {
3599 : struct spdk_nvme_transport_id *trid;
3600 : const struct spdk_nvme_ctrlr_opts *opts;
3601 : const struct spdk_nvme_ctrlr_data *cdata;
3602 : struct nvme_path_id *path_id;
3603 :
3604 0 : spdk_json_write_object_begin(w);
3605 :
3606 0 : spdk_json_write_named_string(w, "state", nvme_ctrlr_get_state_str(nvme_ctrlr));
3607 :
3608 : #ifdef SPDK_CONFIG_NVME_CUSE
3609 0 : size_t cuse_name_size = 128;
3610 0 : char cuse_name[cuse_name_size];
3611 :
3612 0 : int rc = spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr, cuse_name, &cuse_name_size);
3613 0 : if (rc == 0) {
3614 0 : spdk_json_write_named_string(w, "cuse_device", cuse_name);
3615 : }
3616 : #endif
3617 0 : trid = &nvme_ctrlr->active_path_id->trid;
3618 0 : spdk_json_write_named_object_begin(w, "trid");
3619 0 : nvme_bdev_dump_trid_json(trid, w);
3620 0 : spdk_json_write_object_end(w);
3621 :
3622 0 : path_id = TAILQ_NEXT(nvme_ctrlr->active_path_id, link);
3623 0 : if (path_id != NULL) {
3624 0 : spdk_json_write_named_array_begin(w, "alternate_trids");
3625 : do {
3626 0 : trid = &path_id->trid;
3627 0 : spdk_json_write_object_begin(w);
3628 0 : nvme_bdev_dump_trid_json(trid, w);
3629 0 : spdk_json_write_object_end(w);
3630 :
3631 0 : path_id = TAILQ_NEXT(path_id, link);
3632 0 : } while (path_id != NULL);
3633 0 : spdk_json_write_array_end(w);
3634 : }
3635 :
3636 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
3637 0 : spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
3638 :
3639 0 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
3640 0 : spdk_json_write_named_object_begin(w, "host");
3641 0 : spdk_json_write_named_string(w, "nqn", opts->hostnqn);
3642 0 : spdk_json_write_named_string(w, "addr", opts->src_addr);
3643 0 : spdk_json_write_named_string(w, "svcid", opts->src_svcid);
3644 0 : spdk_json_write_object_end(w);
3645 :
3646 0 : spdk_json_write_object_end(w);
3647 0 : }
3648 :
3649 : static void
3650 0 : nvme_namespace_info_json(struct spdk_json_write_ctx *w,
3651 : struct nvme_ns *nvme_ns)
3652 0 : {
3653 : struct spdk_nvme_ns *ns;
3654 : struct spdk_nvme_ctrlr *ctrlr;
3655 : const struct spdk_nvme_ctrlr_data *cdata;
3656 : const struct spdk_nvme_transport_id *trid;
3657 : union spdk_nvme_vs_register vs;
3658 : const struct spdk_nvme_ns_data *nsdata;
3659 0 : char buf[128];
3660 :
3661 0 : ns = nvme_ns->ns;
3662 0 : if (ns == NULL) {
3663 0 : return;
3664 : }
3665 :
3666 0 : ctrlr = spdk_nvme_ns_get_ctrlr(ns);
3667 :
3668 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3669 0 : trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
3670 0 : vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr);
3671 :
3672 0 : spdk_json_write_object_begin(w);
3673 :
3674 0 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
3675 0 : spdk_json_write_named_string(w, "pci_address", trid->traddr);
3676 : }
3677 :
3678 0 : spdk_json_write_named_object_begin(w, "trid");
3679 :
3680 0 : nvme_bdev_dump_trid_json(trid, w);
3681 :
3682 0 : spdk_json_write_object_end(w);
3683 :
3684 : #ifdef SPDK_CONFIG_NVME_CUSE
3685 0 : size_t cuse_name_size = 128;
3686 0 : char cuse_name[cuse_name_size];
3687 :
3688 0 : int rc = spdk_nvme_cuse_get_ns_name(ctrlr, spdk_nvme_ns_get_id(ns),
3689 : cuse_name, &cuse_name_size);
3690 0 : if (rc == 0) {
3691 0 : spdk_json_write_named_string(w, "cuse_device", cuse_name);
3692 : }
3693 : #endif
3694 :
3695 0 : spdk_json_write_named_object_begin(w, "ctrlr_data");
3696 :
3697 0 : spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
3698 :
3699 0 : spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
3700 :
3701 0 : snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
3702 0 : spdk_str_trim(buf);
3703 0 : spdk_json_write_named_string(w, "model_number", buf);
3704 :
3705 0 : snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
3706 0 : spdk_str_trim(buf);
3707 0 : spdk_json_write_named_string(w, "serial_number", buf);
3708 :
3709 0 : snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
3710 0 : spdk_str_trim(buf);
3711 0 : spdk_json_write_named_string(w, "firmware_revision", buf);
3712 :
3713 0 : if (cdata->subnqn[0] != '\0') {
3714 0 : spdk_json_write_named_string(w, "subnqn", cdata->subnqn);
3715 : }
3716 :
3717 0 : spdk_json_write_named_object_begin(w, "oacs");
3718 :
3719 0 : spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
3720 0 : spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
3721 0 : spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
3722 0 : spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
3723 :
3724 0 : spdk_json_write_object_end(w);
3725 :
3726 0 : spdk_json_write_named_bool(w, "multi_ctrlr", cdata->cmic.multi_ctrlr);
3727 0 : spdk_json_write_named_bool(w, "ana_reporting", cdata->cmic.ana_reporting);
3728 :
3729 0 : spdk_json_write_object_end(w);
3730 :
3731 0 : spdk_json_write_named_object_begin(w, "vs");
3732 :
3733 0 : spdk_json_write_name(w, "nvme_version");
3734 0 : if (vs.bits.ter) {
3735 0 : spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
3736 : } else {
3737 0 : spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
3738 : }
3739 :
3740 0 : spdk_json_write_object_end(w);
3741 :
3742 0 : nsdata = spdk_nvme_ns_get_data(ns);
3743 :
3744 0 : spdk_json_write_named_object_begin(w, "ns_data");
3745 :
3746 0 : spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
3747 :
3748 0 : if (cdata->cmic.ana_reporting) {
3749 0 : spdk_json_write_named_string(w, "ana_state",
3750 : _nvme_ana_state_str(nvme_ns->ana_state));
3751 : }
3752 :
3753 0 : spdk_json_write_named_bool(w, "can_share", nsdata->nmic.can_share);
3754 :
3755 0 : spdk_json_write_object_end(w);
3756 :
3757 0 : if (cdata->oacs.security) {
3758 0 : spdk_json_write_named_object_begin(w, "security");
3759 :
3760 0 : spdk_json_write_named_bool(w, "opal", nvme_ns->bdev->opal);
3761 :
3762 0 : spdk_json_write_object_end(w);
3763 : }
3764 :
3765 0 : spdk_json_write_object_end(w);
3766 : }
3767 :
3768 : static const char *
3769 0 : nvme_bdev_get_mp_policy_str(struct nvme_bdev *nbdev)
3770 : {
3771 0 : switch (nbdev->mp_policy) {
3772 0 : case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
3773 0 : return "active_passive";
3774 0 : case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
3775 0 : return "active_active";
3776 0 : default:
3777 0 : assert(false);
3778 : return "invalid";
3779 : }
3780 : }
3781 :
3782 : static const char *
3783 0 : nvme_bdev_get_mp_selector_str(struct nvme_bdev *nbdev)
3784 : {
3785 0 : switch (nbdev->mp_selector) {
3786 0 : case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
3787 0 : return "round_robin";
3788 0 : case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
3789 0 : return "queue_depth";
3790 0 : default:
3791 0 : assert(false);
3792 : return "invalid";
3793 : }
3794 : }
3795 :
3796 : static int
3797 0 : bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
3798 : {
3799 0 : struct nvme_bdev *nvme_bdev = ctx;
3800 : struct nvme_ns *nvme_ns;
3801 :
3802 0 : pthread_mutex_lock(&nvme_bdev->mutex);
3803 0 : spdk_json_write_named_array_begin(w, "nvme");
3804 0 : TAILQ_FOREACH(nvme_ns, &nvme_bdev->nvme_ns_list, tailq) {
3805 0 : nvme_namespace_info_json(w, nvme_ns);
3806 : }
3807 0 : spdk_json_write_array_end(w);
3808 0 : spdk_json_write_named_string(w, "mp_policy", nvme_bdev_get_mp_policy_str(nvme_bdev));
3809 0 : if (nvme_bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
3810 0 : spdk_json_write_named_string(w, "selector", nvme_bdev_get_mp_selector_str(nvme_bdev));
3811 0 : if (nvme_bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
3812 0 : spdk_json_write_named_uint32(w, "rr_min_io", nvme_bdev->rr_min_io);
3813 : }
3814 : }
3815 0 : pthread_mutex_unlock(&nvme_bdev->mutex);
3816 :
3817 0 : return 0;
3818 : }
3819 :
3820 : static void
3821 0 : bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
3822 : {
3823 : /* No config per bdev needed */
3824 0 : }
3825 :
3826 : static uint64_t
3827 0 : bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
3828 : {
3829 0 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
3830 : struct nvme_io_path *io_path;
3831 : struct nvme_poll_group *group;
3832 0 : uint64_t spin_time = 0;
3833 :
3834 0 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
3835 0 : group = io_path->qpair->group;
3836 :
3837 0 : if (!group || !group->collect_spin_stat) {
3838 0 : continue;
3839 : }
3840 :
3841 0 : if (group->end_ticks != 0) {
3842 0 : group->spin_ticks += (group->end_ticks - group->start_ticks);
3843 0 : group->end_ticks = 0;
3844 : }
3845 :
3846 0 : spin_time += group->spin_ticks;
3847 0 : group->start_ticks = 0;
3848 0 : group->spin_ticks = 0;
3849 : }
3850 :
3851 0 : return (spin_time * 1000000ULL) / spdk_get_ticks_hz();
3852 : }
3853 :
3854 : static void
3855 0 : bdev_nvme_reset_device_stat(void *ctx)
3856 : {
3857 0 : struct nvme_bdev *nbdev = ctx;
3858 :
3859 0 : if (nbdev->err_stat != NULL) {
3860 0 : memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat));
3861 : }
3862 0 : }
3863 :
3864 : /* JSON string should be lowercases and underscore delimited string. */
3865 : static void
3866 0 : bdev_nvme_format_nvme_status(char *dst, const char *src)
3867 : {
3868 0 : char tmp[256];
3869 :
3870 0 : spdk_strcpy_replace(dst, 256, src, " - ", "_");
3871 0 : spdk_strcpy_replace(tmp, 256, dst, "-", "_");
3872 0 : spdk_strcpy_replace(dst, 256, tmp, " ", "_");
3873 0 : spdk_strlwr(dst);
3874 0 : }
3875 :
3876 : static void
3877 0 : bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w)
3878 : {
3879 0 : struct nvme_bdev *nbdev = ctx;
3880 0 : struct spdk_nvme_status status = {};
3881 : uint16_t sct, sc;
3882 0 : char status_json[256];
3883 : const char *status_str;
3884 :
3885 0 : if (nbdev->err_stat == NULL) {
3886 0 : return;
3887 : }
3888 :
3889 0 : spdk_json_write_named_object_begin(w, "nvme_error");
3890 :
3891 0 : spdk_json_write_named_object_begin(w, "status_type");
3892 0 : for (sct = 0; sct < 8; sct++) {
3893 0 : if (nbdev->err_stat->status_type[sct] == 0) {
3894 0 : continue;
3895 : }
3896 0 : status.sct = sct;
3897 :
3898 0 : status_str = spdk_nvme_cpl_get_status_type_string(&status);
3899 0 : assert(status_str != NULL);
3900 0 : bdev_nvme_format_nvme_status(status_json, status_str);
3901 :
3902 0 : spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]);
3903 : }
3904 0 : spdk_json_write_object_end(w);
3905 :
3906 0 : spdk_json_write_named_object_begin(w, "status_code");
3907 0 : for (sct = 0; sct < 4; sct++) {
3908 0 : status.sct = sct;
3909 0 : for (sc = 0; sc < 256; sc++) {
3910 0 : if (nbdev->err_stat->status[sct][sc] == 0) {
3911 0 : continue;
3912 : }
3913 0 : status.sc = sc;
3914 :
3915 0 : status_str = spdk_nvme_cpl_get_status_string(&status);
3916 0 : assert(status_str != NULL);
3917 0 : bdev_nvme_format_nvme_status(status_json, status_str);
3918 :
3919 0 : spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]);
3920 : }
3921 : }
3922 0 : spdk_json_write_object_end(w);
3923 :
3924 0 : spdk_json_write_object_end(w);
3925 : }
3926 :
3927 : static bool
3928 0 : bdev_nvme_accel_sequence_supported(void *ctx, enum spdk_bdev_io_type type)
3929 : {
3930 0 : struct nvme_bdev *nbdev = ctx;
3931 : struct spdk_nvme_ctrlr *ctrlr;
3932 :
3933 0 : if (!g_opts.allow_accel_sequence) {
3934 0 : return false;
3935 : }
3936 :
3937 0 : switch (type) {
3938 0 : case SPDK_BDEV_IO_TYPE_WRITE:
3939 : case SPDK_BDEV_IO_TYPE_READ:
3940 0 : break;
3941 0 : default:
3942 0 : return false;
3943 : }
3944 :
3945 0 : ctrlr = bdev_nvme_get_ctrlr(&nbdev->disk);
3946 0 : assert(ctrlr != NULL);
3947 :
3948 0 : return spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
3949 : }
3950 :
3951 : static const struct spdk_bdev_fn_table nvmelib_fn_table = {
3952 : .destruct = bdev_nvme_destruct,
3953 : .submit_request = bdev_nvme_submit_request,
3954 : .io_type_supported = bdev_nvme_io_type_supported,
3955 : .get_io_channel = bdev_nvme_get_io_channel,
3956 : .dump_info_json = bdev_nvme_dump_info_json,
3957 : .write_config_json = bdev_nvme_write_config_json,
3958 : .get_spin_time = bdev_nvme_get_spin_time,
3959 : .get_module_ctx = bdev_nvme_get_module_ctx,
3960 : .get_memory_domains = bdev_nvme_get_memory_domains,
3961 : .accel_sequence_supported = bdev_nvme_accel_sequence_supported,
3962 : .reset_device_stat = bdev_nvme_reset_device_stat,
3963 : .dump_device_stat_json = bdev_nvme_dump_device_stat_json,
3964 : };
3965 :
3966 : typedef int (*bdev_nvme_parse_ana_log_page_cb)(
3967 : const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg);
3968 :
3969 : static int
3970 40 : bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
3971 : bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
3972 : {
3973 : struct spdk_nvme_ana_group_descriptor *copied_desc;
3974 : uint8_t *orig_desc;
3975 : uint32_t i, desc_size, copy_len;
3976 40 : int rc = 0;
3977 :
3978 40 : if (nvme_ctrlr->ana_log_page == NULL) {
3979 0 : return -EINVAL;
3980 : }
3981 :
3982 40 : copied_desc = nvme_ctrlr->copied_ana_desc;
3983 :
3984 40 : orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
3985 40 : copy_len = nvme_ctrlr->max_ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
3986 :
3987 69 : for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) {
3988 65 : memcpy(copied_desc, orig_desc, copy_len);
3989 :
3990 65 : rc = cb_fn(copied_desc, cb_arg);
3991 65 : if (rc != 0) {
3992 36 : break;
3993 : }
3994 :
3995 29 : desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
3996 29 : copied_desc->num_of_nsid * sizeof(uint32_t);
3997 29 : orig_desc += desc_size;
3998 29 : copy_len -= desc_size;
3999 : }
4000 :
4001 40 : return rc;
4002 : }
4003 :
4004 : static int
4005 5 : nvme_ns_ana_transition_timedout(void *ctx)
4006 : {
4007 5 : struct nvme_ns *nvme_ns = ctx;
4008 :
4009 5 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4010 5 : nvme_ns->ana_transition_timedout = true;
4011 :
4012 5 : return SPDK_POLLER_BUSY;
4013 : }
4014 :
4015 : static void
4016 45 : _nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
4017 : const struct spdk_nvme_ana_group_descriptor *desc)
4018 : {
4019 : const struct spdk_nvme_ctrlr_data *cdata;
4020 :
4021 45 : nvme_ns->ana_group_id = desc->ana_group_id;
4022 45 : nvme_ns->ana_state = desc->ana_state;
4023 45 : nvme_ns->ana_state_updating = false;
4024 :
4025 45 : switch (nvme_ns->ana_state) {
4026 38 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
4027 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
4028 38 : nvme_ns->ana_transition_timedout = false;
4029 38 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4030 38 : break;
4031 :
4032 6 : case SPDK_NVME_ANA_INACCESSIBLE_STATE:
4033 : case SPDK_NVME_ANA_CHANGE_STATE:
4034 6 : if (nvme_ns->anatt_timer != NULL) {
4035 1 : break;
4036 : }
4037 :
4038 5 : cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
4039 5 : nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
4040 : nvme_ns,
4041 : cdata->anatt * SPDK_SEC_TO_USEC);
4042 5 : break;
4043 1 : default:
4044 1 : break;
4045 : }
4046 45 : }
4047 :
4048 : static int
4049 59 : nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg)
4050 : {
4051 59 : struct nvme_ns *nvme_ns = cb_arg;
4052 : uint32_t i;
4053 :
4054 59 : assert(nvme_ns->ns != NULL);
4055 :
4056 81 : for (i = 0; i < desc->num_of_nsid; i++) {
4057 58 : if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) {
4058 22 : continue;
4059 : }
4060 :
4061 36 : _nvme_ns_set_ana_state(nvme_ns, desc);
4062 36 : return 1;
4063 : }
4064 :
4065 23 : return 0;
4066 : }
4067 :
4068 : static int
4069 5 : nvme_generate_uuid(const char *sn, uint32_t nsid, struct spdk_uuid *uuid)
4070 : {
4071 5 : int rc = 0;
4072 5 : struct spdk_uuid new_uuid, namespace_uuid;
4073 5 : char merged_str[SPDK_NVME_CTRLR_SN_LEN + NSID_STR_LEN + 1] = {'\0'};
4074 : /* This namespace UUID was generated using uuid_generate() method. */
4075 5 : const char *namespace_str = {"edaed2de-24bc-4b07-b559-f47ecbe730fd"};
4076 : int size;
4077 :
4078 5 : assert(strlen(sn) <= SPDK_NVME_CTRLR_SN_LEN);
4079 :
4080 5 : spdk_uuid_set_null(&new_uuid);
4081 5 : spdk_uuid_set_null(&namespace_uuid);
4082 :
4083 5 : size = snprintf(merged_str, sizeof(merged_str), "%s%"PRIu32, sn, nsid);
4084 5 : if (size <= 0 || (unsigned long)size >= sizeof(merged_str)) {
4085 0 : return -EINVAL;
4086 : }
4087 :
4088 5 : spdk_uuid_parse(&namespace_uuid, namespace_str);
4089 :
4090 5 : rc = spdk_uuid_generate_sha1(&new_uuid, &namespace_uuid, merged_str, size);
4091 5 : if (rc == 0) {
4092 5 : memcpy(uuid, &new_uuid, sizeof(struct spdk_uuid));
4093 : }
4094 :
4095 5 : return rc;
4096 : }
4097 :
4098 : static int
4099 37 : nvme_disk_create(struct spdk_bdev *disk, const char *base_name,
4100 : struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns,
4101 : uint32_t prchk_flags, void *ctx)
4102 : {
4103 : const struct spdk_uuid *uuid;
4104 : const uint8_t *nguid;
4105 : const struct spdk_nvme_ctrlr_data *cdata;
4106 : const struct spdk_nvme_ns_data *nsdata;
4107 : const struct spdk_nvme_ctrlr_opts *opts;
4108 : enum spdk_nvme_csi csi;
4109 : uint32_t atomic_bs, phys_bs, bs;
4110 37 : char sn_tmp[SPDK_NVME_CTRLR_SN_LEN + 1] = {'\0'};
4111 : int rc;
4112 :
4113 37 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
4114 37 : csi = spdk_nvme_ns_get_csi(ns);
4115 37 : opts = spdk_nvme_ctrlr_get_opts(ctrlr);
4116 :
4117 37 : switch (csi) {
4118 37 : case SPDK_NVME_CSI_NVM:
4119 37 : disk->product_name = "NVMe disk";
4120 37 : break;
4121 0 : case SPDK_NVME_CSI_ZNS:
4122 0 : disk->product_name = "NVMe ZNS disk";
4123 0 : disk->zoned = true;
4124 0 : disk->zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
4125 0 : disk->max_zone_append_size = spdk_nvme_zns_ctrlr_get_max_zone_append_size(ctrlr) /
4126 0 : spdk_nvme_ns_get_extended_sector_size(ns);
4127 0 : disk->max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(ns);
4128 0 : disk->max_active_zones = spdk_nvme_zns_ns_get_max_active_zones(ns);
4129 0 : break;
4130 0 : default:
4131 0 : SPDK_ERRLOG("unsupported CSI: %u\n", csi);
4132 0 : return -ENOTSUP;
4133 : }
4134 :
4135 37 : nguid = spdk_nvme_ns_get_nguid(ns);
4136 37 : if (!nguid) {
4137 37 : uuid = spdk_nvme_ns_get_uuid(ns);
4138 37 : if (uuid) {
4139 12 : disk->uuid = *uuid;
4140 25 : } else if (g_opts.generate_uuids) {
4141 0 : spdk_strcpy_pad(sn_tmp, cdata->sn, SPDK_NVME_CTRLR_SN_LEN, '\0');
4142 0 : rc = nvme_generate_uuid(sn_tmp, spdk_nvme_ns_get_id(ns), &disk->uuid);
4143 0 : if (rc < 0) {
4144 0 : SPDK_ERRLOG("UUID generation failed (%s)\n", spdk_strerror(-rc));
4145 0 : return rc;
4146 : }
4147 : }
4148 : } else {
4149 0 : memcpy(&disk->uuid, nguid, sizeof(disk->uuid));
4150 : }
4151 :
4152 37 : disk->name = spdk_sprintf_alloc("%sn%d", base_name, spdk_nvme_ns_get_id(ns));
4153 37 : if (!disk->name) {
4154 0 : return -ENOMEM;
4155 : }
4156 :
4157 37 : disk->write_cache = 0;
4158 37 : if (cdata->vwc.present) {
4159 : /* Enable if the Volatile Write Cache exists */
4160 0 : disk->write_cache = 1;
4161 : }
4162 37 : if (cdata->oncs.write_zeroes) {
4163 0 : disk->max_write_zeroes = UINT16_MAX + 1;
4164 : }
4165 37 : disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
4166 37 : disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns);
4167 37 : disk->max_segment_size = spdk_nvme_ctrlr_get_max_xfer_size(ctrlr);
4168 37 : disk->ctratt.raw = cdata->ctratt.raw;
4169 : /* NVMe driver will split one request into multiple requests
4170 : * based on MDTS and stripe boundary, the bdev layer will use
4171 : * max_segment_size and max_num_segments to split one big IO
4172 : * into multiple requests, then small request can't run out
4173 : * of NVMe internal requests data structure.
4174 : */
4175 37 : if (opts && opts->io_queue_requests) {
4176 0 : disk->max_num_segments = opts->io_queue_requests / 2;
4177 : }
4178 37 : if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_SGL_SUPPORTED) {
4179 : /* The nvme driver will try to split I/O that have too many
4180 : * SGEs, but it doesn't work if that last SGE doesn't end on
4181 : * an aggregate total that is block aligned. The bdev layer has
4182 : * a more robust splitting framework, so use that instead for
4183 : * this case. (See issue #3269.)
4184 : */
4185 0 : uint16_t max_sges = spdk_nvme_ctrlr_get_max_sges(ctrlr);
4186 :
4187 0 : if (disk->max_num_segments == 0) {
4188 0 : disk->max_num_segments = max_sges;
4189 : } else {
4190 0 : disk->max_num_segments = spdk_min(disk->max_num_segments, max_sges);
4191 : }
4192 : }
4193 37 : disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
4194 :
4195 37 : nsdata = spdk_nvme_ns_get_data(ns);
4196 37 : bs = spdk_nvme_ns_get_sector_size(ns);
4197 37 : atomic_bs = bs;
4198 37 : phys_bs = bs;
4199 37 : if (nsdata->nabo == 0) {
4200 37 : if (nsdata->nsfeat.ns_atomic_write_unit && nsdata->nawupf) {
4201 0 : atomic_bs = bs * (1 + nsdata->nawupf);
4202 : } else {
4203 37 : atomic_bs = bs * (1 + cdata->awupf);
4204 : }
4205 : }
4206 37 : if (nsdata->nsfeat.optperf) {
4207 0 : phys_bs = bs * (1 + nsdata->npwg);
4208 : }
4209 37 : disk->phys_blocklen = spdk_min(phys_bs, atomic_bs);
4210 :
4211 37 : disk->md_len = spdk_nvme_ns_get_md_size(ns);
4212 37 : if (disk->md_len != 0) {
4213 0 : disk->md_interleave = nsdata->flbas.extended;
4214 0 : disk->dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
4215 0 : if (disk->dif_type != SPDK_DIF_DISABLE) {
4216 0 : disk->dif_is_head_of_md = nsdata->dps.md_start;
4217 0 : disk->dif_check_flags = prchk_flags;
4218 : }
4219 : }
4220 :
4221 37 : if (!(spdk_nvme_ctrlr_get_flags(ctrlr) &
4222 : SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED)) {
4223 37 : disk->acwu = 0;
4224 0 : } else if (nsdata->nsfeat.ns_atomic_write_unit) {
4225 0 : disk->acwu = nsdata->nacwu + 1; /* 0-based */
4226 : } else {
4227 0 : disk->acwu = cdata->acwu + 1; /* 0-based */
4228 : }
4229 :
4230 37 : if (cdata->oncs.copy) {
4231 : /* For now bdev interface allows only single segment copy */
4232 0 : disk->max_copy = nsdata->mssrl;
4233 : }
4234 :
4235 37 : disk->ctxt = ctx;
4236 37 : disk->fn_table = &nvmelib_fn_table;
4237 37 : disk->module = &nvme_if;
4238 :
4239 37 : return 0;
4240 : }
4241 :
4242 : static struct nvme_bdev *
4243 37 : nvme_bdev_alloc(void)
4244 : {
4245 : struct nvme_bdev *bdev;
4246 : int rc;
4247 :
4248 37 : bdev = calloc(1, sizeof(*bdev));
4249 37 : if (!bdev) {
4250 0 : SPDK_ERRLOG("bdev calloc() failed\n");
4251 0 : return NULL;
4252 : }
4253 :
4254 37 : if (g_opts.nvme_error_stat) {
4255 0 : bdev->err_stat = calloc(1, sizeof(struct nvme_error_stat));
4256 0 : if (!bdev->err_stat) {
4257 0 : SPDK_ERRLOG("err_stat calloc() failed\n");
4258 0 : free(bdev);
4259 0 : return NULL;
4260 : }
4261 : }
4262 :
4263 37 : rc = pthread_mutex_init(&bdev->mutex, NULL);
4264 37 : if (rc != 0) {
4265 0 : free(bdev->err_stat);
4266 0 : free(bdev);
4267 0 : return NULL;
4268 : }
4269 :
4270 37 : bdev->ref = 1;
4271 37 : bdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE;
4272 37 : bdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN;
4273 37 : bdev->rr_min_io = UINT32_MAX;
4274 37 : TAILQ_INIT(&bdev->nvme_ns_list);
4275 :
4276 37 : return bdev;
4277 : }
4278 :
4279 : static int
4280 37 : nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4281 : {
4282 : struct nvme_bdev *bdev;
4283 37 : struct nvme_bdev_ctrlr *nbdev_ctrlr = nvme_ctrlr->nbdev_ctrlr;
4284 : int rc;
4285 :
4286 37 : bdev = nvme_bdev_alloc();
4287 37 : if (bdev == NULL) {
4288 0 : SPDK_ERRLOG("Failed to allocate NVMe bdev\n");
4289 0 : return -ENOMEM;
4290 : }
4291 :
4292 37 : bdev->opal = nvme_ctrlr->opal_dev != NULL;
4293 :
4294 37 : rc = nvme_disk_create(&bdev->disk, nbdev_ctrlr->name, nvme_ctrlr->ctrlr,
4295 : nvme_ns->ns, nvme_ctrlr->opts.prchk_flags, bdev);
4296 37 : if (rc != 0) {
4297 0 : SPDK_ERRLOG("Failed to create NVMe disk\n");
4298 0 : nvme_bdev_free(bdev);
4299 0 : return rc;
4300 : }
4301 :
4302 37 : spdk_io_device_register(bdev,
4303 : bdev_nvme_create_bdev_channel_cb,
4304 : bdev_nvme_destroy_bdev_channel_cb,
4305 : sizeof(struct nvme_bdev_channel),
4306 37 : bdev->disk.name);
4307 :
4308 37 : nvme_ns->bdev = bdev;
4309 37 : bdev->nsid = nvme_ns->id;
4310 37 : TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
4311 :
4312 37 : bdev->nbdev_ctrlr = nbdev_ctrlr;
4313 37 : TAILQ_INSERT_TAIL(&nbdev_ctrlr->bdevs, bdev, tailq);
4314 :
4315 37 : rc = spdk_bdev_register(&bdev->disk);
4316 37 : if (rc != 0) {
4317 1 : SPDK_ERRLOG("spdk_bdev_register() failed\n");
4318 1 : spdk_io_device_unregister(bdev, NULL);
4319 1 : nvme_ns->bdev = NULL;
4320 1 : TAILQ_REMOVE(&nbdev_ctrlr->bdevs, bdev, tailq);
4321 1 : nvme_bdev_free(bdev);
4322 1 : return rc;
4323 : }
4324 :
4325 36 : return 0;
4326 : }
4327 :
4328 : static bool
4329 23 : bdev_nvme_compare_ns(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
4330 : {
4331 : const struct spdk_nvme_ns_data *nsdata1, *nsdata2;
4332 : const struct spdk_uuid *uuid1, *uuid2;
4333 :
4334 23 : nsdata1 = spdk_nvme_ns_get_data(ns1);
4335 23 : nsdata2 = spdk_nvme_ns_get_data(ns2);
4336 23 : uuid1 = spdk_nvme_ns_get_uuid(ns1);
4337 23 : uuid2 = spdk_nvme_ns_get_uuid(ns2);
4338 :
4339 45 : return memcmp(nsdata1->nguid, nsdata2->nguid, sizeof(nsdata1->nguid)) == 0 &&
4340 22 : nsdata1->eui64 == nsdata2->eui64 &&
4341 21 : ((uuid1 == NULL && uuid2 == NULL) ||
4342 59 : (uuid1 != NULL && uuid2 != NULL && spdk_uuid_compare(uuid1, uuid2) == 0)) &&
4343 18 : spdk_nvme_ns_get_csi(ns1) == spdk_nvme_ns_get_csi(ns2);
4344 : }
4345 :
4346 : static bool
4347 0 : hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
4348 : struct spdk_nvme_ctrlr_opts *opts)
4349 : {
4350 : struct nvme_probe_skip_entry *entry;
4351 :
4352 0 : TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
4353 0 : if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
4354 0 : return false;
4355 : }
4356 : }
4357 :
4358 0 : opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
4359 0 : opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
4360 0 : opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
4361 0 : opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
4362 0 : opts->disable_read_ana_log_page = true;
4363 :
4364 0 : SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr);
4365 :
4366 0 : return true;
4367 : }
4368 :
4369 : static void
4370 0 : nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
4371 : {
4372 0 : struct nvme_ctrlr *nvme_ctrlr = ctx;
4373 :
4374 0 : if (spdk_nvme_cpl_is_error(cpl)) {
4375 0 : SPDK_WARNLOG("Abort failed. Resetting controller. sc is %u, sct is %u.\n", cpl->status.sc,
4376 : cpl->status.sct);
4377 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4378 0 : } else if (cpl->cdw0 & 0x1) {
4379 0 : SPDK_WARNLOG("Specified command could not be aborted.\n");
4380 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4381 : }
4382 0 : }
4383 :
4384 : static void
4385 0 : timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
4386 : struct spdk_nvme_qpair *qpair, uint16_t cid)
4387 : {
4388 0 : struct nvme_ctrlr *nvme_ctrlr = cb_arg;
4389 : union spdk_nvme_csts_register csts;
4390 : int rc;
4391 :
4392 0 : assert(nvme_ctrlr->ctrlr == ctrlr);
4393 :
4394 0 : SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
4395 :
4396 : /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O
4397 : * queue. (Note: qpair == NULL when there's an admin cmd timeout.) Otherwise we
4398 : * would submit another fabrics cmd on the admin queue to read CSTS and check for its
4399 : * completion recursively.
4400 : */
4401 0 : if (nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE || qpair != NULL) {
4402 0 : csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
4403 0 : if (csts.bits.cfs) {
4404 0 : SPDK_ERRLOG("Controller Fatal Status, reset required\n");
4405 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4406 0 : return;
4407 : }
4408 : }
4409 :
4410 0 : switch (g_opts.action_on_timeout) {
4411 0 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
4412 0 : if (qpair) {
4413 : /* Don't send abort to ctrlr when ctrlr is not available. */
4414 0 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4415 0 : if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
4416 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4417 0 : SPDK_NOTICELOG("Quit abort. Ctrlr is not available.\n");
4418 0 : return;
4419 : }
4420 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4421 :
4422 0 : rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
4423 : nvme_abort_cpl, nvme_ctrlr);
4424 0 : if (rc == 0) {
4425 0 : return;
4426 : }
4427 :
4428 0 : SPDK_ERRLOG("Unable to send abort. Resetting, rc is %d.\n", rc);
4429 : }
4430 :
4431 : /* FALLTHROUGH */
4432 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
4433 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4434 0 : break;
4435 0 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
4436 0 : SPDK_DEBUGLOG(bdev_nvme, "No action for nvme controller timeout.\n");
4437 0 : break;
4438 0 : default:
4439 0 : SPDK_ERRLOG("An invalid timeout action value is found.\n");
4440 0 : break;
4441 : }
4442 : }
4443 :
4444 : static struct nvme_ns *
4445 50 : nvme_ns_alloc(void)
4446 : {
4447 : struct nvme_ns *nvme_ns;
4448 :
4449 50 : nvme_ns = calloc(1, sizeof(struct nvme_ns));
4450 50 : if (nvme_ns == NULL) {
4451 0 : return NULL;
4452 : }
4453 :
4454 50 : if (g_opts.io_path_stat) {
4455 0 : nvme_ns->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
4456 0 : if (nvme_ns->stat == NULL) {
4457 0 : free(nvme_ns);
4458 0 : return NULL;
4459 : }
4460 0 : spdk_bdev_reset_io_stat(nvme_ns->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
4461 : }
4462 :
4463 50 : return nvme_ns;
4464 : }
4465 :
4466 : static void
4467 50 : nvme_ns_free(struct nvme_ns *nvme_ns)
4468 : {
4469 50 : free(nvme_ns->stat);
4470 50 : free(nvme_ns);
4471 50 : }
4472 :
4473 : static void
4474 50 : nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc)
4475 : {
4476 50 : struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
4477 50 : struct nvme_async_probe_ctx *ctx = nvme_ns->probe_ctx;
4478 :
4479 50 : if (rc == 0) {
4480 48 : nvme_ns->probe_ctx = NULL;
4481 48 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4482 48 : nvme_ctrlr->ref++;
4483 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4484 : } else {
4485 2 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4486 2 : nvme_ns_free(nvme_ns);
4487 : }
4488 :
4489 50 : if (ctx) {
4490 49 : ctx->populates_in_progress--;
4491 49 : if (ctx->populates_in_progress == 0) {
4492 12 : nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
4493 : }
4494 : }
4495 50 : }
4496 :
4497 : static void
4498 2 : bdev_nvme_add_io_path(struct spdk_io_channel_iter *i)
4499 : {
4500 2 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
4501 2 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
4502 2 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4503 : int rc;
4504 :
4505 2 : rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
4506 2 : if (rc != 0) {
4507 0 : SPDK_ERRLOG("Failed to add I/O path to bdev_channel dynamically.\n");
4508 : }
4509 :
4510 2 : spdk_for_each_channel_continue(i, rc);
4511 2 : }
4512 :
4513 : static void
4514 2 : bdev_nvme_delete_io_path(struct spdk_io_channel_iter *i)
4515 : {
4516 2 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
4517 2 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
4518 2 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4519 : struct nvme_io_path *io_path;
4520 :
4521 2 : io_path = _bdev_nvme_get_io_path(nbdev_ch, nvme_ns);
4522 2 : if (io_path != NULL) {
4523 2 : _bdev_nvme_delete_io_path(nbdev_ch, io_path);
4524 : }
4525 :
4526 2 : spdk_for_each_channel_continue(i, 0);
4527 2 : }
4528 :
4529 : static void
4530 0 : bdev_nvme_add_io_path_failed(struct spdk_io_channel_iter *i, int status)
4531 : {
4532 0 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4533 :
4534 0 : nvme_ctrlr_populate_namespace_done(nvme_ns, -1);
4535 0 : }
4536 :
4537 : static void
4538 12 : bdev_nvme_add_io_path_done(struct spdk_io_channel_iter *i, int status)
4539 : {
4540 12 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4541 12 : struct nvme_bdev *bdev = spdk_io_channel_iter_get_io_device(i);
4542 :
4543 12 : if (status == 0) {
4544 12 : nvme_ctrlr_populate_namespace_done(nvme_ns, 0);
4545 : } else {
4546 : /* Delete the added io_paths and fail populating the namespace. */
4547 0 : spdk_for_each_channel(bdev,
4548 : bdev_nvme_delete_io_path,
4549 : nvme_ns,
4550 : bdev_nvme_add_io_path_failed);
4551 : }
4552 12 : }
4553 :
4554 : static int
4555 13 : nvme_bdev_add_ns(struct nvme_bdev *bdev, struct nvme_ns *nvme_ns)
4556 : {
4557 : struct nvme_ns *tmp_ns;
4558 : const struct spdk_nvme_ns_data *nsdata;
4559 :
4560 13 : nsdata = spdk_nvme_ns_get_data(nvme_ns->ns);
4561 13 : if (!nsdata->nmic.can_share) {
4562 0 : SPDK_ERRLOG("Namespace cannot be shared.\n");
4563 0 : return -EINVAL;
4564 : }
4565 :
4566 13 : pthread_mutex_lock(&bdev->mutex);
4567 :
4568 13 : tmp_ns = TAILQ_FIRST(&bdev->nvme_ns_list);
4569 13 : assert(tmp_ns != NULL);
4570 :
4571 13 : if (tmp_ns->ns != NULL && !bdev_nvme_compare_ns(nvme_ns->ns, tmp_ns->ns)) {
4572 1 : pthread_mutex_unlock(&bdev->mutex);
4573 1 : SPDK_ERRLOG("Namespaces are not identical.\n");
4574 1 : return -EINVAL;
4575 : }
4576 :
4577 12 : bdev->ref++;
4578 12 : TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
4579 12 : nvme_ns->bdev = bdev;
4580 :
4581 12 : pthread_mutex_unlock(&bdev->mutex);
4582 :
4583 : /* Add nvme_io_path to nvme_bdev_channels dynamically. */
4584 12 : spdk_for_each_channel(bdev,
4585 : bdev_nvme_add_io_path,
4586 : nvme_ns,
4587 : bdev_nvme_add_io_path_done);
4588 :
4589 12 : return 0;
4590 : }
4591 :
4592 : static void
4593 50 : nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4594 : {
4595 : struct spdk_nvme_ns *ns;
4596 : struct nvme_bdev *bdev;
4597 50 : int rc = 0;
4598 :
4599 50 : ns = spdk_nvme_ctrlr_get_ns(nvme_ctrlr->ctrlr, nvme_ns->id);
4600 50 : if (!ns) {
4601 0 : SPDK_DEBUGLOG(bdev_nvme, "Invalid NS %d\n", nvme_ns->id);
4602 0 : rc = -EINVAL;
4603 0 : goto done;
4604 : }
4605 :
4606 50 : nvme_ns->ns = ns;
4607 50 : nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
4608 :
4609 50 : if (nvme_ctrlr->ana_log_page != NULL) {
4610 37 : bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns);
4611 : }
4612 :
4613 50 : bdev = nvme_bdev_ctrlr_get_bdev(nvme_ctrlr->nbdev_ctrlr, nvme_ns->id);
4614 50 : if (bdev == NULL) {
4615 37 : rc = nvme_bdev_create(nvme_ctrlr, nvme_ns);
4616 : } else {
4617 13 : rc = nvme_bdev_add_ns(bdev, nvme_ns);
4618 13 : if (rc == 0) {
4619 12 : return;
4620 : }
4621 : }
4622 1 : done:
4623 38 : nvme_ctrlr_populate_namespace_done(nvme_ns, rc);
4624 : }
4625 :
4626 : static void
4627 48 : nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns)
4628 : {
4629 48 : struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
4630 :
4631 48 : assert(nvme_ctrlr != NULL);
4632 :
4633 48 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4634 :
4635 48 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4636 :
4637 48 : if (nvme_ns->bdev != NULL) {
4638 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4639 0 : return;
4640 : }
4641 :
4642 48 : nvme_ns_free(nvme_ns);
4643 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4644 :
4645 48 : nvme_ctrlr_release(nvme_ctrlr);
4646 : }
4647 :
4648 : static void
4649 11 : bdev_nvme_delete_io_path_done(struct spdk_io_channel_iter *i, int status)
4650 : {
4651 11 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4652 :
4653 11 : nvme_ctrlr_depopulate_namespace_done(nvme_ns);
4654 11 : }
4655 :
4656 : static void
4657 48 : nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4658 : {
4659 : struct nvme_bdev *bdev;
4660 :
4661 48 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4662 :
4663 48 : bdev = nvme_ns->bdev;
4664 48 : if (bdev != NULL) {
4665 44 : pthread_mutex_lock(&bdev->mutex);
4666 :
4667 44 : assert(bdev->ref > 0);
4668 44 : bdev->ref--;
4669 44 : if (bdev->ref == 0) {
4670 33 : pthread_mutex_unlock(&bdev->mutex);
4671 :
4672 33 : spdk_bdev_unregister(&bdev->disk, NULL, NULL);
4673 : } else {
4674 : /* spdk_bdev_unregister() is not called until the last nvme_ns is
4675 : * depopulated. Hence we need to remove nvme_ns from bdev->nvme_ns_list
4676 : * and clear nvme_ns->bdev here.
4677 : */
4678 11 : TAILQ_REMOVE(&bdev->nvme_ns_list, nvme_ns, tailq);
4679 11 : nvme_ns->bdev = NULL;
4680 :
4681 11 : pthread_mutex_unlock(&bdev->mutex);
4682 :
4683 : /* Delete nvme_io_paths from nvme_bdev_channels dynamically. After that,
4684 : * we call depopulate_namespace_done() to avoid use-after-free.
4685 : */
4686 11 : spdk_for_each_channel(bdev,
4687 : bdev_nvme_delete_io_path,
4688 : nvme_ns,
4689 : bdev_nvme_delete_io_path_done);
4690 11 : return;
4691 : }
4692 : }
4693 :
4694 37 : nvme_ctrlr_depopulate_namespace_done(nvme_ns);
4695 : }
4696 :
4697 : static void
4698 61 : nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
4699 : struct nvme_async_probe_ctx *ctx)
4700 : {
4701 61 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
4702 : struct nvme_ns *nvme_ns, *next;
4703 : struct spdk_nvme_ns *ns;
4704 : struct nvme_bdev *bdev;
4705 : uint32_t nsid;
4706 : int rc;
4707 : uint64_t num_sectors;
4708 :
4709 61 : if (ctx) {
4710 : /* Initialize this count to 1 to handle the populate functions
4711 : * calling nvme_ctrlr_populate_namespace_done() immediately.
4712 : */
4713 45 : ctx->populates_in_progress = 1;
4714 : }
4715 :
4716 : /* First loop over our existing namespaces and see if they have been
4717 : * removed. */
4718 61 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
4719 65 : while (nvme_ns != NULL) {
4720 4 : next = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
4721 :
4722 4 : if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
4723 : /* NS is still there or added again. Its attributes may have changed. */
4724 3 : ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id);
4725 3 : if (nvme_ns->ns != ns) {
4726 1 : assert(nvme_ns->ns == NULL);
4727 1 : nvme_ns->ns = ns;
4728 1 : SPDK_DEBUGLOG(bdev_nvme, "NSID %u was added\n", nvme_ns->id);
4729 : }
4730 :
4731 3 : num_sectors = spdk_nvme_ns_get_num_sectors(ns);
4732 3 : bdev = nvme_ns->bdev;
4733 3 : assert(bdev != NULL);
4734 3 : if (bdev->disk.blockcnt != num_sectors) {
4735 1 : SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %" PRIu64 ", new size %" PRIu64 "\n",
4736 : nvme_ns->id,
4737 : bdev->disk.name,
4738 : bdev->disk.blockcnt,
4739 : num_sectors);
4740 1 : rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors);
4741 1 : if (rc != 0) {
4742 0 : SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n",
4743 : bdev->disk.name, rc);
4744 : }
4745 : }
4746 : } else {
4747 : /* Namespace was removed */
4748 1 : nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
4749 : }
4750 :
4751 4 : nvme_ns = next;
4752 : }
4753 :
4754 : /* Loop through all of the namespaces at the nvme level and see if any of them are new */
4755 61 : nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
4756 114 : while (nsid != 0) {
4757 53 : nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
4758 :
4759 53 : if (nvme_ns == NULL) {
4760 : /* Found a new one */
4761 50 : nvme_ns = nvme_ns_alloc();
4762 50 : if (nvme_ns == NULL) {
4763 0 : SPDK_ERRLOG("Failed to allocate namespace\n");
4764 : /* This just fails to attach the namespace. It may work on a future attempt. */
4765 0 : continue;
4766 : }
4767 :
4768 50 : nvme_ns->id = nsid;
4769 50 : nvme_ns->ctrlr = nvme_ctrlr;
4770 :
4771 50 : nvme_ns->bdev = NULL;
4772 :
4773 50 : if (ctx) {
4774 49 : ctx->populates_in_progress++;
4775 : }
4776 50 : nvme_ns->probe_ctx = ctx;
4777 :
4778 50 : RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4779 :
4780 50 : nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns);
4781 : }
4782 :
4783 53 : nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid);
4784 : }
4785 :
4786 61 : if (ctx) {
4787 : /* Decrement this count now that the loop is over to account
4788 : * for the one we started with. If the count is then 0, we
4789 : * know any populate_namespace functions completed immediately,
4790 : * so we'll kick the callback here.
4791 : */
4792 45 : ctx->populates_in_progress--;
4793 45 : if (ctx->populates_in_progress == 0) {
4794 33 : nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
4795 : }
4796 : }
4797 :
4798 61 : }
4799 :
4800 : static void
4801 59 : nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
4802 : {
4803 : struct nvme_ns *nvme_ns, *tmp;
4804 :
4805 106 : RB_FOREACH_SAFE(nvme_ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp) {
4806 47 : nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
4807 : }
4808 59 : }
4809 :
4810 : static uint32_t
4811 36 : nvme_ctrlr_get_ana_log_page_size(struct nvme_ctrlr *nvme_ctrlr)
4812 : {
4813 36 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
4814 : const struct spdk_nvme_ctrlr_data *cdata;
4815 36 : uint32_t nsid, ns_count = 0;
4816 :
4817 36 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
4818 :
4819 36 : for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
4820 80 : nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
4821 44 : ns_count++;
4822 : }
4823 :
4824 36 : return sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
4825 36 : sizeof(struct spdk_nvme_ana_group_descriptor) + ns_count *
4826 : sizeof(uint32_t);
4827 : }
4828 :
4829 : static int
4830 6 : nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
4831 : void *cb_arg)
4832 : {
4833 6 : struct nvme_ctrlr *nvme_ctrlr = cb_arg;
4834 : struct nvme_ns *nvme_ns;
4835 : uint32_t i, nsid;
4836 :
4837 11 : for (i = 0; i < desc->num_of_nsid; i++) {
4838 5 : nsid = desc->nsid[i];
4839 5 : if (nsid == 0) {
4840 0 : continue;
4841 : }
4842 :
4843 5 : nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
4844 :
4845 5 : assert(nvme_ns != NULL);
4846 5 : if (nvme_ns == NULL) {
4847 : /* Target told us that an inactive namespace had an ANA change */
4848 0 : continue;
4849 : }
4850 :
4851 5 : _nvme_ns_set_ana_state(nvme_ns, desc);
4852 : }
4853 :
4854 6 : return 0;
4855 : }
4856 :
4857 : static void
4858 0 : bdev_nvme_disable_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
4859 : {
4860 : struct nvme_ns *nvme_ns;
4861 :
4862 0 : spdk_free(nvme_ctrlr->ana_log_page);
4863 0 : nvme_ctrlr->ana_log_page = NULL;
4864 :
4865 0 : for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
4866 0 : nvme_ns != NULL;
4867 0 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
4868 0 : nvme_ns->ana_state_updating = false;
4869 0 : nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
4870 : }
4871 0 : }
4872 :
4873 : static void
4874 3 : nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
4875 : {
4876 3 : struct nvme_ctrlr *nvme_ctrlr = ctx;
4877 :
4878 3 : if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
4879 3 : bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
4880 : nvme_ctrlr);
4881 : } else {
4882 0 : bdev_nvme_disable_read_ana_log_page(nvme_ctrlr);
4883 : }
4884 :
4885 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4886 :
4887 3 : assert(nvme_ctrlr->ana_log_page_updating == true);
4888 3 : nvme_ctrlr->ana_log_page_updating = false;
4889 :
4890 3 : if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
4891 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4892 :
4893 0 : nvme_ctrlr_unregister(nvme_ctrlr);
4894 : } else {
4895 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4896 :
4897 3 : bdev_nvme_clear_io_path_caches(nvme_ctrlr);
4898 : }
4899 3 : }
4900 :
4901 : static int
4902 6 : nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
4903 : {
4904 : uint32_t ana_log_page_size;
4905 : int rc;
4906 :
4907 6 : if (nvme_ctrlr->ana_log_page == NULL) {
4908 0 : return -EINVAL;
4909 : }
4910 :
4911 6 : ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
4912 :
4913 6 : if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
4914 0 : SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
4915 : ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
4916 0 : return -EINVAL;
4917 : }
4918 :
4919 6 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4920 6 : if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
4921 : nvme_ctrlr->ana_log_page_updating) {
4922 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4923 3 : return -EBUSY;
4924 : }
4925 :
4926 3 : nvme_ctrlr->ana_log_page_updating = true;
4927 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4928 :
4929 3 : rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
4930 : SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
4931 : SPDK_NVME_GLOBAL_NS_TAG,
4932 3 : nvme_ctrlr->ana_log_page,
4933 : ana_log_page_size, 0,
4934 : nvme_ctrlr_read_ana_log_page_done,
4935 : nvme_ctrlr);
4936 3 : if (rc != 0) {
4937 0 : nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
4938 : }
4939 :
4940 3 : return rc;
4941 : }
4942 :
4943 : static void
4944 0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
4945 : {
4946 0 : }
4947 :
4948 : struct bdev_nvme_set_preferred_path_ctx {
4949 : struct spdk_bdev_desc *desc;
4950 : struct nvme_ns *nvme_ns;
4951 : bdev_nvme_set_preferred_path_cb cb_fn;
4952 : void *cb_arg;
4953 : };
4954 :
4955 : static void
4956 3 : bdev_nvme_set_preferred_path_done(struct spdk_io_channel_iter *i, int status)
4957 : {
4958 3 : struct bdev_nvme_set_preferred_path_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
4959 :
4960 3 : assert(ctx != NULL);
4961 3 : assert(ctx->desc != NULL);
4962 3 : assert(ctx->cb_fn != NULL);
4963 :
4964 3 : spdk_bdev_close(ctx->desc);
4965 :
4966 3 : ctx->cb_fn(ctx->cb_arg, status);
4967 :
4968 3 : free(ctx);
4969 3 : }
4970 :
4971 : static void
4972 2 : _bdev_nvme_set_preferred_path(struct spdk_io_channel_iter *i)
4973 : {
4974 2 : struct bdev_nvme_set_preferred_path_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
4975 2 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
4976 2 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
4977 : struct nvme_io_path *io_path, *prev;
4978 :
4979 2 : prev = NULL;
4980 3 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
4981 3 : if (io_path->nvme_ns == ctx->nvme_ns) {
4982 2 : break;
4983 : }
4984 1 : prev = io_path;
4985 : }
4986 :
4987 2 : if (io_path != NULL) {
4988 2 : if (prev != NULL) {
4989 1 : STAILQ_REMOVE_AFTER(&nbdev_ch->io_path_list, prev, stailq);
4990 1 : STAILQ_INSERT_HEAD(&nbdev_ch->io_path_list, io_path, stailq);
4991 : }
4992 :
4993 : /* We can set io_path to nbdev_ch->current_io_path directly here.
4994 : * However, it needs to be conditional. To simplify the code,
4995 : * just clear nbdev_ch->current_io_path and let find_io_path()
4996 : * fill it.
4997 : *
4998 : * Automatic failback may be disabled. Hence even if the io_path is
4999 : * already at the head, clear nbdev_ch->current_io_path.
5000 : */
5001 2 : bdev_nvme_clear_current_io_path(nbdev_ch);
5002 : }
5003 :
5004 2 : spdk_for_each_channel_continue(i, 0);
5005 2 : }
5006 :
5007 : static struct nvme_ns *
5008 3 : bdev_nvme_set_preferred_ns(struct nvme_bdev *nbdev, uint16_t cntlid)
5009 : {
5010 : struct nvme_ns *nvme_ns, *prev;
5011 : const struct spdk_nvme_ctrlr_data *cdata;
5012 :
5013 3 : prev = NULL;
5014 6 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
5015 6 : cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
5016 :
5017 6 : if (cdata->cntlid == cntlid) {
5018 3 : break;
5019 : }
5020 3 : prev = nvme_ns;
5021 : }
5022 :
5023 3 : if (nvme_ns != NULL && prev != NULL) {
5024 2 : TAILQ_REMOVE(&nbdev->nvme_ns_list, nvme_ns, tailq);
5025 2 : TAILQ_INSERT_HEAD(&nbdev->nvme_ns_list, nvme_ns, tailq);
5026 : }
5027 :
5028 3 : return nvme_ns;
5029 : }
5030 :
5031 : /* This function supports only multipath mode. There is only a single I/O path
5032 : * for each NVMe-oF controller. Hence, just move the matched I/O path to the
5033 : * head of the I/O path list for each NVMe bdev channel.
5034 : *
5035 : * NVMe bdev channel may be acquired after completing this function. move the
5036 : * matched namespace to the head of the namespace list for the NVMe bdev too.
5037 : */
5038 : void
5039 3 : bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid,
5040 : bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg)
5041 : {
5042 : struct bdev_nvme_set_preferred_path_ctx *ctx;
5043 : struct spdk_bdev *bdev;
5044 : struct nvme_bdev *nbdev;
5045 3 : int rc = 0;
5046 :
5047 3 : assert(cb_fn != NULL);
5048 :
5049 3 : ctx = calloc(1, sizeof(*ctx));
5050 3 : if (ctx == NULL) {
5051 0 : SPDK_ERRLOG("Failed to alloc context.\n");
5052 0 : rc = -ENOMEM;
5053 0 : goto err_alloc;
5054 : }
5055 :
5056 3 : ctx->cb_fn = cb_fn;
5057 3 : ctx->cb_arg = cb_arg;
5058 :
5059 3 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
5060 3 : if (rc != 0) {
5061 0 : SPDK_ERRLOG("Failed to open bdev %s.\n", name);
5062 0 : goto err_open;
5063 : }
5064 :
5065 3 : bdev = spdk_bdev_desc_get_bdev(ctx->desc);
5066 :
5067 3 : if (bdev->module != &nvme_if) {
5068 0 : SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
5069 0 : rc = -ENODEV;
5070 0 : goto err_bdev;
5071 : }
5072 :
5073 3 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
5074 :
5075 3 : pthread_mutex_lock(&nbdev->mutex);
5076 :
5077 3 : ctx->nvme_ns = bdev_nvme_set_preferred_ns(nbdev, cntlid);
5078 3 : if (ctx->nvme_ns == NULL) {
5079 0 : pthread_mutex_unlock(&nbdev->mutex);
5080 :
5081 0 : SPDK_ERRLOG("bdev %s does not have namespace to controller %u.\n", name, cntlid);
5082 0 : rc = -ENODEV;
5083 0 : goto err_bdev;
5084 : }
5085 :
5086 3 : pthread_mutex_unlock(&nbdev->mutex);
5087 :
5088 3 : spdk_for_each_channel(nbdev,
5089 : _bdev_nvme_set_preferred_path,
5090 : ctx,
5091 : bdev_nvme_set_preferred_path_done);
5092 3 : return;
5093 :
5094 0 : err_bdev:
5095 0 : spdk_bdev_close(ctx->desc);
5096 0 : err_open:
5097 0 : free(ctx);
5098 0 : err_alloc:
5099 0 : cb_fn(cb_arg, rc);
5100 : }
5101 :
5102 : struct bdev_nvme_set_multipath_policy_ctx {
5103 : struct spdk_bdev_desc *desc;
5104 : bdev_nvme_set_multipath_policy_cb cb_fn;
5105 : void *cb_arg;
5106 : };
5107 :
5108 : static void
5109 3 : bdev_nvme_set_multipath_policy_done(struct spdk_io_channel_iter *i, int status)
5110 : {
5111 3 : struct bdev_nvme_set_multipath_policy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
5112 :
5113 3 : assert(ctx != NULL);
5114 3 : assert(ctx->desc != NULL);
5115 3 : assert(ctx->cb_fn != NULL);
5116 :
5117 3 : spdk_bdev_close(ctx->desc);
5118 :
5119 3 : ctx->cb_fn(ctx->cb_arg, status);
5120 :
5121 3 : free(ctx);
5122 3 : }
5123 :
5124 : static void
5125 1 : _bdev_nvme_set_multipath_policy(struct spdk_io_channel_iter *i)
5126 : {
5127 1 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
5128 1 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
5129 1 : struct nvme_bdev *nbdev = spdk_io_channel_get_io_device(_ch);
5130 :
5131 1 : nbdev_ch->mp_policy = nbdev->mp_policy;
5132 1 : nbdev_ch->mp_selector = nbdev->mp_selector;
5133 1 : nbdev_ch->rr_min_io = nbdev->rr_min_io;
5134 1 : bdev_nvme_clear_current_io_path(nbdev_ch);
5135 :
5136 1 : spdk_for_each_channel_continue(i, 0);
5137 1 : }
5138 :
5139 : void
5140 3 : bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy policy,
5141 : enum bdev_nvme_multipath_selector selector, uint32_t rr_min_io,
5142 : bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
5143 : {
5144 : struct bdev_nvme_set_multipath_policy_ctx *ctx;
5145 : struct spdk_bdev *bdev;
5146 : struct nvme_bdev *nbdev;
5147 : int rc;
5148 :
5149 3 : assert(cb_fn != NULL);
5150 :
5151 3 : switch (policy) {
5152 1 : case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
5153 1 : break;
5154 2 : case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
5155 : switch (selector) {
5156 1 : case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
5157 1 : if (rr_min_io == UINT32_MAX) {
5158 0 : rr_min_io = 1;
5159 1 : } else if (rr_min_io == 0) {
5160 0 : rc = -EINVAL;
5161 0 : goto exit;
5162 : }
5163 1 : break;
5164 1 : case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
5165 1 : break;
5166 0 : default:
5167 0 : rc = -EINVAL;
5168 0 : goto exit;
5169 : }
5170 2 : break;
5171 0 : default:
5172 0 : rc = -EINVAL;
5173 0 : goto exit;
5174 : }
5175 :
5176 3 : ctx = calloc(1, sizeof(*ctx));
5177 3 : if (ctx == NULL) {
5178 0 : SPDK_ERRLOG("Failed to alloc context.\n");
5179 0 : rc = -ENOMEM;
5180 0 : goto exit;
5181 : }
5182 :
5183 3 : ctx->cb_fn = cb_fn;
5184 3 : ctx->cb_arg = cb_arg;
5185 :
5186 3 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
5187 3 : if (rc != 0) {
5188 0 : SPDK_ERRLOG("Failed to open bdev %s.\n", name);
5189 0 : rc = -ENODEV;
5190 0 : goto err_open;
5191 : }
5192 :
5193 3 : bdev = spdk_bdev_desc_get_bdev(ctx->desc);
5194 3 : if (bdev->module != &nvme_if) {
5195 0 : SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
5196 0 : rc = -ENODEV;
5197 0 : goto err_module;
5198 : }
5199 3 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
5200 :
5201 3 : pthread_mutex_lock(&nbdev->mutex);
5202 3 : nbdev->mp_policy = policy;
5203 3 : nbdev->mp_selector = selector;
5204 3 : nbdev->rr_min_io = rr_min_io;
5205 3 : pthread_mutex_unlock(&nbdev->mutex);
5206 :
5207 3 : spdk_for_each_channel(nbdev,
5208 : _bdev_nvme_set_multipath_policy,
5209 : ctx,
5210 : bdev_nvme_set_multipath_policy_done);
5211 3 : return;
5212 :
5213 0 : err_module:
5214 0 : spdk_bdev_close(ctx->desc);
5215 0 : err_open:
5216 0 : free(ctx);
5217 0 : exit:
5218 0 : cb_fn(cb_arg, rc);
5219 : }
5220 :
5221 : static void
5222 3 : aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
5223 : {
5224 3 : struct nvme_ctrlr *nvme_ctrlr = arg;
5225 : union spdk_nvme_async_event_completion event;
5226 :
5227 3 : if (spdk_nvme_cpl_is_error(cpl)) {
5228 0 : SPDK_WARNLOG("AER request execute failed\n");
5229 0 : return;
5230 : }
5231 :
5232 3 : event.raw = cpl->cdw0;
5233 3 : if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
5234 3 : (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
5235 2 : nvme_ctrlr_populate_namespaces(nvme_ctrlr, NULL);
5236 1 : } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
5237 1 : (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
5238 1 : nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
5239 : }
5240 : }
5241 :
5242 : static void
5243 51 : free_nvme_async_probe_ctx(struct nvme_async_probe_ctx *ctx)
5244 : {
5245 51 : spdk_keyring_put_key(ctx->drv_opts.tls_psk);
5246 51 : spdk_keyring_put_key(ctx->drv_opts.dhchap_key);
5247 51 : spdk_keyring_put_key(ctx->drv_opts.dhchap_ctrlr_key);
5248 51 : free(ctx);
5249 51 : }
5250 :
5251 : static void
5252 51 : populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, int rc)
5253 : {
5254 51 : if (ctx->cb_fn) {
5255 51 : ctx->cb_fn(ctx->cb_ctx, ctx->reported_bdevs, rc);
5256 : }
5257 :
5258 51 : ctx->namespaces_populated = true;
5259 51 : if (ctx->probe_done) {
5260 : /* The probe was already completed, so we need to free the context
5261 : * here. This can happen for cases like OCSSD, where we need to
5262 : * send additional commands to the SSD after attach.
5263 : */
5264 31 : free_nvme_async_probe_ctx(ctx);
5265 : }
5266 51 : }
5267 :
5268 : static int
5269 18 : bdev_nvme_remove_poller(void *ctx)
5270 : {
5271 18 : struct spdk_nvme_transport_id trid_pcie;
5272 :
5273 18 : if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
5274 1 : spdk_poller_unregister(&g_hotplug_poller);
5275 1 : return SPDK_POLLER_IDLE;
5276 : }
5277 :
5278 17 : memset(&trid_pcie, 0, sizeof(trid_pcie));
5279 17 : spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
5280 :
5281 17 : if (spdk_nvme_scan_attached(&trid_pcie)) {
5282 0 : SPDK_ERRLOG_RATELIMIT("spdk_nvme_scan_attached() failed\n");
5283 : }
5284 :
5285 17 : return SPDK_POLLER_BUSY;
5286 : }
5287 :
5288 : static void
5289 59 : nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr,
5290 : struct nvme_async_probe_ctx *ctx)
5291 : {
5292 59 : spdk_io_device_register(nvme_ctrlr,
5293 : bdev_nvme_create_ctrlr_channel_cb,
5294 : bdev_nvme_destroy_ctrlr_channel_cb,
5295 : sizeof(struct nvme_ctrlr_channel),
5296 59 : nvme_ctrlr->nbdev_ctrlr->name);
5297 :
5298 59 : nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx);
5299 :
5300 59 : if (g_hotplug_poller == NULL) {
5301 2 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
5302 : NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
5303 : }
5304 59 : }
5305 :
5306 : static void
5307 30 : nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl)
5308 : {
5309 30 : struct nvme_ctrlr *nvme_ctrlr = _ctx;
5310 30 : struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx;
5311 :
5312 30 : nvme_ctrlr->probe_ctx = NULL;
5313 :
5314 30 : if (spdk_nvme_cpl_is_error(cpl)) {
5315 0 : nvme_ctrlr_delete(nvme_ctrlr);
5316 :
5317 0 : if (ctx != NULL) {
5318 0 : ctx->reported_bdevs = 0;
5319 0 : populate_namespaces_cb(ctx, -1);
5320 : }
5321 0 : return;
5322 : }
5323 :
5324 30 : nvme_ctrlr_create_done(nvme_ctrlr, ctx);
5325 : }
5326 :
5327 : static int
5328 30 : nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
5329 : struct nvme_async_probe_ctx *ctx)
5330 : {
5331 30 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5332 : const struct spdk_nvme_ctrlr_data *cdata;
5333 : uint32_t ana_log_page_size;
5334 :
5335 30 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5336 :
5337 : /* Set buffer size enough to include maximum number of allowed namespaces. */
5338 30 : ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
5339 30 : sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->mnan *
5340 : sizeof(uint32_t);
5341 :
5342 30 : nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL,
5343 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
5344 30 : if (nvme_ctrlr->ana_log_page == NULL) {
5345 0 : SPDK_ERRLOG("could not allocate ANA log page buffer\n");
5346 0 : return -ENXIO;
5347 : }
5348 :
5349 : /* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned.
5350 : * Hence copy each descriptor to a temporary area when parsing it.
5351 : *
5352 : * Allocate a buffer whose size is as large as ANA log page buffer because
5353 : * we do not know the size of a descriptor until actually reading it.
5354 : */
5355 30 : nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
5356 30 : if (nvme_ctrlr->copied_ana_desc == NULL) {
5357 0 : SPDK_ERRLOG("could not allocate a buffer to parse ANA descriptor\n");
5358 0 : return -ENOMEM;
5359 : }
5360 :
5361 30 : nvme_ctrlr->max_ana_log_page_size = ana_log_page_size;
5362 :
5363 30 : nvme_ctrlr->probe_ctx = ctx;
5364 :
5365 : /* Then, set the read size only to include the current active namespaces. */
5366 30 : ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
5367 :
5368 30 : if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
5369 0 : SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
5370 : ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
5371 0 : return -EINVAL;
5372 : }
5373 :
5374 30 : return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
5375 : SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
5376 : SPDK_NVME_GLOBAL_NS_TAG,
5377 30 : nvme_ctrlr->ana_log_page,
5378 : ana_log_page_size, 0,
5379 : nvme_ctrlr_init_ana_log_page_done,
5380 : nvme_ctrlr);
5381 : }
5382 :
5383 : /* hostnqn and subnqn were already verified before attaching a controller.
5384 : * Hence check only the multipath capability and cntlid here.
5385 : */
5386 : static bool
5387 16 : bdev_nvme_check_multipath(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct spdk_nvme_ctrlr *ctrlr)
5388 : {
5389 : struct nvme_ctrlr *tmp;
5390 : const struct spdk_nvme_ctrlr_data *cdata, *tmp_cdata;
5391 :
5392 16 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5393 :
5394 16 : if (!cdata->cmic.multi_ctrlr) {
5395 0 : SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
5396 0 : return false;
5397 : }
5398 :
5399 33 : TAILQ_FOREACH(tmp, &nbdev_ctrlr->ctrlrs, tailq) {
5400 18 : tmp_cdata = spdk_nvme_ctrlr_get_data(tmp->ctrlr);
5401 :
5402 18 : if (!tmp_cdata->cmic.multi_ctrlr) {
5403 0 : SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
5404 0 : return false;
5405 : }
5406 18 : if (cdata->cntlid == tmp_cdata->cntlid) {
5407 1 : SPDK_ERRLOG("cntlid %u are duplicated.\n", tmp_cdata->cntlid);
5408 1 : return false;
5409 : }
5410 : }
5411 :
5412 15 : return true;
5413 : }
5414 :
5415 : static int
5416 60 : nvme_bdev_ctrlr_create(const char *name, struct nvme_ctrlr *nvme_ctrlr)
5417 : {
5418 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
5419 60 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5420 60 : int rc = 0;
5421 :
5422 60 : pthread_mutex_lock(&g_bdev_nvme_mutex);
5423 :
5424 60 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
5425 60 : if (nbdev_ctrlr != NULL) {
5426 16 : if (!bdev_nvme_check_multipath(nbdev_ctrlr, ctrlr)) {
5427 1 : rc = -EINVAL;
5428 1 : goto exit;
5429 : }
5430 : } else {
5431 44 : nbdev_ctrlr = calloc(1, sizeof(*nbdev_ctrlr));
5432 44 : if (nbdev_ctrlr == NULL) {
5433 0 : SPDK_ERRLOG("Failed to allocate nvme_bdev_ctrlr.\n");
5434 0 : rc = -ENOMEM;
5435 0 : goto exit;
5436 : }
5437 44 : nbdev_ctrlr->name = strdup(name);
5438 44 : if (nbdev_ctrlr->name == NULL) {
5439 0 : SPDK_ERRLOG("Failed to allocate name of nvme_bdev_ctrlr.\n");
5440 0 : free(nbdev_ctrlr);
5441 0 : goto exit;
5442 : }
5443 44 : TAILQ_INIT(&nbdev_ctrlr->ctrlrs);
5444 44 : TAILQ_INIT(&nbdev_ctrlr->bdevs);
5445 44 : TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
5446 : }
5447 59 : nvme_ctrlr->nbdev_ctrlr = nbdev_ctrlr;
5448 59 : TAILQ_INSERT_TAIL(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
5449 60 : exit:
5450 60 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
5451 60 : return rc;
5452 : }
5453 :
5454 : static int
5455 60 : nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
5456 : const char *name,
5457 : const struct spdk_nvme_transport_id *trid,
5458 : struct nvme_async_probe_ctx *ctx)
5459 : {
5460 : struct nvme_ctrlr *nvme_ctrlr;
5461 : struct nvme_path_id *path_id;
5462 : const struct spdk_nvme_ctrlr_data *cdata;
5463 : int rc;
5464 :
5465 60 : nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
5466 60 : if (nvme_ctrlr == NULL) {
5467 0 : SPDK_ERRLOG("Failed to allocate device struct\n");
5468 0 : return -ENOMEM;
5469 : }
5470 :
5471 60 : rc = pthread_mutex_init(&nvme_ctrlr->mutex, NULL);
5472 60 : if (rc != 0) {
5473 0 : free(nvme_ctrlr);
5474 0 : return rc;
5475 : }
5476 :
5477 60 : TAILQ_INIT(&nvme_ctrlr->trids);
5478 60 : RB_INIT(&nvme_ctrlr->namespaces);
5479 :
5480 : /* Get another reference to the key, so the first one can be released from probe_ctx */
5481 60 : if (ctx != NULL) {
5482 46 : if (ctx->drv_opts.tls_psk != NULL) {
5483 0 : nvme_ctrlr->psk = spdk_keyring_get_key(
5484 : spdk_key_get_name(ctx->drv_opts.tls_psk));
5485 0 : if (nvme_ctrlr->psk == NULL) {
5486 : /* Could only happen if the key was removed in the meantime */
5487 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5488 : spdk_key_get_name(ctx->drv_opts.tls_psk));
5489 0 : rc = -ENOKEY;
5490 0 : goto err;
5491 : }
5492 : }
5493 :
5494 46 : if (ctx->drv_opts.dhchap_key != NULL) {
5495 0 : nvme_ctrlr->dhchap_key = spdk_keyring_get_key(
5496 : spdk_key_get_name(ctx->drv_opts.dhchap_key));
5497 0 : if (nvme_ctrlr->dhchap_key == NULL) {
5498 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5499 : spdk_key_get_name(ctx->drv_opts.dhchap_key));
5500 0 : rc = -ENOKEY;
5501 0 : goto err;
5502 : }
5503 : }
5504 :
5505 46 : if (ctx->drv_opts.dhchap_ctrlr_key != NULL) {
5506 0 : nvme_ctrlr->dhchap_ctrlr_key =
5507 0 : spdk_keyring_get_key(
5508 : spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
5509 0 : if (nvme_ctrlr->dhchap_ctrlr_key == NULL) {
5510 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5511 : spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
5512 0 : rc = -ENOKEY;
5513 0 : goto err;
5514 : }
5515 : }
5516 : }
5517 :
5518 60 : path_id = calloc(1, sizeof(*path_id));
5519 60 : if (path_id == NULL) {
5520 0 : SPDK_ERRLOG("Failed to allocate trid entry pointer\n");
5521 0 : rc = -ENOMEM;
5522 0 : goto err;
5523 : }
5524 :
5525 60 : path_id->trid = *trid;
5526 60 : if (ctx != NULL) {
5527 46 : memcpy(path_id->hostid.hostaddr, ctx->drv_opts.src_addr, sizeof(path_id->hostid.hostaddr));
5528 46 : memcpy(path_id->hostid.hostsvcid, ctx->drv_opts.src_svcid, sizeof(path_id->hostid.hostsvcid));
5529 : }
5530 60 : nvme_ctrlr->active_path_id = path_id;
5531 60 : TAILQ_INSERT_HEAD(&nvme_ctrlr->trids, path_id, link);
5532 :
5533 60 : nvme_ctrlr->thread = spdk_get_thread();
5534 60 : nvme_ctrlr->ctrlr = ctrlr;
5535 60 : nvme_ctrlr->ref = 1;
5536 :
5537 60 : if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
5538 0 : SPDK_ERRLOG("OCSSDs are not supported");
5539 0 : rc = -ENOTSUP;
5540 0 : goto err;
5541 : }
5542 :
5543 60 : if (ctx != NULL) {
5544 46 : memcpy(&nvme_ctrlr->opts, &ctx->bdev_opts, sizeof(ctx->bdev_opts));
5545 : } else {
5546 14 : bdev_nvme_get_default_ctrlr_opts(&nvme_ctrlr->opts);
5547 : }
5548 :
5549 60 : nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr,
5550 : g_opts.nvme_adminq_poll_period_us);
5551 :
5552 60 : if (g_opts.timeout_us > 0) {
5553 : /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */
5554 : /* If timeout_admin_us is 0 (not specified), admin uses same timeout as IO. */
5555 0 : uint64_t adm_timeout_us = (g_opts.timeout_admin_us == 0) ?
5556 0 : g_opts.timeout_us : g_opts.timeout_admin_us;
5557 0 : spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
5558 : adm_timeout_us, timeout_cb, nvme_ctrlr);
5559 : }
5560 :
5561 60 : spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
5562 60 : spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr);
5563 :
5564 60 : if (spdk_nvme_ctrlr_get_flags(ctrlr) &
5565 : SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
5566 0 : nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr);
5567 : }
5568 :
5569 60 : rc = nvme_bdev_ctrlr_create(name, nvme_ctrlr);
5570 60 : if (rc != 0) {
5571 1 : goto err;
5572 : }
5573 :
5574 59 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5575 :
5576 59 : if (cdata->cmic.ana_reporting) {
5577 30 : rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx);
5578 30 : if (rc == 0) {
5579 30 : return 0;
5580 : }
5581 : } else {
5582 29 : nvme_ctrlr_create_done(nvme_ctrlr, ctx);
5583 29 : return 0;
5584 : }
5585 :
5586 1 : err:
5587 1 : nvme_ctrlr_delete(nvme_ctrlr);
5588 1 : return rc;
5589 : }
5590 :
5591 : void
5592 56 : bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts)
5593 : {
5594 56 : opts->prchk_flags = 0;
5595 56 : opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
5596 56 : opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
5597 56 : opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
5598 56 : }
5599 :
5600 : static void
5601 0 : attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
5602 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
5603 : {
5604 : char *name;
5605 :
5606 0 : name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
5607 0 : if (!name) {
5608 0 : SPDK_ERRLOG("Failed to assign name to NVMe device\n");
5609 0 : return;
5610 : }
5611 :
5612 0 : if (nvme_ctrlr_create(ctrlr, name, trid, NULL) == 0) {
5613 0 : SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name);
5614 : } else {
5615 0 : SPDK_ERRLOG("Failed to attach to %s (%s)\n", trid->traddr, name);
5616 : }
5617 :
5618 0 : free(name);
5619 : }
5620 :
5621 : static void
5622 59 : _nvme_ctrlr_destruct(void *ctx)
5623 : {
5624 59 : struct nvme_ctrlr *nvme_ctrlr = ctx;
5625 :
5626 59 : nvme_ctrlr_depopulate_namespaces(nvme_ctrlr);
5627 59 : nvme_ctrlr_release(nvme_ctrlr);
5628 59 : }
5629 :
5630 : static int
5631 56 : bdev_nvme_delete_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
5632 : {
5633 : struct nvme_probe_skip_entry *entry;
5634 :
5635 : /* The controller's destruction was already started */
5636 56 : if (nvme_ctrlr->destruct) {
5637 0 : return -EALREADY;
5638 : }
5639 :
5640 56 : if (!hotplug &&
5641 56 : nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
5642 0 : entry = calloc(1, sizeof(*entry));
5643 0 : if (!entry) {
5644 0 : return -ENOMEM;
5645 : }
5646 0 : entry->trid = nvme_ctrlr->active_path_id->trid;
5647 0 : TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
5648 : }
5649 :
5650 56 : nvme_ctrlr->destruct = true;
5651 56 : return 0;
5652 : }
5653 :
5654 : static int
5655 2 : bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
5656 : {
5657 : int rc;
5658 :
5659 2 : pthread_mutex_lock(&nvme_ctrlr->mutex);
5660 2 : rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, hotplug);
5661 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5662 :
5663 2 : if (rc == 0) {
5664 2 : _nvme_ctrlr_destruct(nvme_ctrlr);
5665 0 : } else if (rc == -EALREADY) {
5666 0 : rc = 0;
5667 : }
5668 :
5669 2 : return rc;
5670 : }
5671 :
5672 : static void
5673 0 : remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
5674 : {
5675 0 : struct nvme_ctrlr *nvme_ctrlr = cb_ctx;
5676 :
5677 0 : bdev_nvme_delete_ctrlr(nvme_ctrlr, true);
5678 0 : }
5679 :
5680 : static int
5681 0 : bdev_nvme_hotplug_probe(void *arg)
5682 : {
5683 0 : if (g_hotplug_probe_ctx == NULL) {
5684 0 : spdk_poller_unregister(&g_hotplug_probe_poller);
5685 0 : return SPDK_POLLER_IDLE;
5686 : }
5687 :
5688 0 : if (spdk_nvme_probe_poll_async(g_hotplug_probe_ctx) != -EAGAIN) {
5689 0 : g_hotplug_probe_ctx = NULL;
5690 0 : spdk_poller_unregister(&g_hotplug_probe_poller);
5691 : }
5692 :
5693 0 : return SPDK_POLLER_BUSY;
5694 : }
5695 :
5696 : static int
5697 0 : bdev_nvme_hotplug(void *arg)
5698 : {
5699 0 : struct spdk_nvme_transport_id trid_pcie;
5700 :
5701 0 : if (g_hotplug_probe_ctx) {
5702 0 : return SPDK_POLLER_BUSY;
5703 : }
5704 :
5705 0 : memset(&trid_pcie, 0, sizeof(trid_pcie));
5706 0 : spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
5707 :
5708 0 : g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
5709 : hotplug_probe_cb, attach_cb, NULL);
5710 :
5711 0 : if (g_hotplug_probe_ctx) {
5712 0 : assert(g_hotplug_probe_poller == NULL);
5713 0 : g_hotplug_probe_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug_probe, NULL, 1000);
5714 : }
5715 :
5716 0 : return SPDK_POLLER_BUSY;
5717 : }
5718 :
5719 : void
5720 0 : bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
5721 : {
5722 0 : *opts = g_opts;
5723 0 : }
5724 :
5725 : static bool bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
5726 : uint32_t reconnect_delay_sec,
5727 : uint32_t fast_io_fail_timeout_sec);
5728 :
5729 : static int
5730 0 : bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
5731 : {
5732 0 : if ((opts->timeout_us == 0) && (opts->timeout_admin_us != 0)) {
5733 : /* Can't set timeout_admin_us without also setting timeout_us */
5734 0 : SPDK_WARNLOG("Invalid options: Can't have (timeout_us == 0) with (timeout_admin_us > 0)\n");
5735 0 : return -EINVAL;
5736 : }
5737 :
5738 0 : if (opts->bdev_retry_count < -1) {
5739 0 : SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n");
5740 0 : return -EINVAL;
5741 : }
5742 :
5743 0 : if (!bdev_nvme_check_io_error_resiliency_params(opts->ctrlr_loss_timeout_sec,
5744 0 : opts->reconnect_delay_sec,
5745 0 : opts->fast_io_fail_timeout_sec)) {
5746 0 : return -EINVAL;
5747 : }
5748 :
5749 0 : return 0;
5750 : }
5751 :
5752 : int
5753 0 : bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
5754 : {
5755 : int ret;
5756 :
5757 0 : ret = bdev_nvme_validate_opts(opts);
5758 0 : if (ret) {
5759 0 : SPDK_WARNLOG("Failed to set nvme opts.\n");
5760 0 : return ret;
5761 : }
5762 :
5763 0 : if (g_bdev_nvme_init_thread != NULL) {
5764 0 : if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
5765 0 : return -EPERM;
5766 : }
5767 : }
5768 :
5769 0 : if (opts->rdma_srq_size != 0 ||
5770 0 : opts->rdma_max_cq_size != 0 ||
5771 0 : opts->rdma_cm_event_timeout_ms != 0) {
5772 0 : struct spdk_nvme_transport_opts drv_opts;
5773 :
5774 0 : spdk_nvme_transport_get_opts(&drv_opts, sizeof(drv_opts));
5775 0 : if (opts->rdma_srq_size != 0) {
5776 0 : drv_opts.rdma_srq_size = opts->rdma_srq_size;
5777 : }
5778 0 : if (opts->rdma_max_cq_size != 0) {
5779 0 : drv_opts.rdma_max_cq_size = opts->rdma_max_cq_size;
5780 : }
5781 0 : if (opts->rdma_cm_event_timeout_ms != 0) {
5782 0 : drv_opts.rdma_cm_event_timeout_ms = opts->rdma_cm_event_timeout_ms;
5783 : }
5784 :
5785 0 : ret = spdk_nvme_transport_set_opts(&drv_opts, sizeof(drv_opts));
5786 0 : if (ret) {
5787 0 : SPDK_ERRLOG("Failed to set NVMe transport opts.\n");
5788 0 : return ret;
5789 : }
5790 : }
5791 :
5792 0 : g_opts = *opts;
5793 :
5794 0 : return 0;
5795 : }
5796 :
5797 : struct set_nvme_hotplug_ctx {
5798 : uint64_t period_us;
5799 : bool enabled;
5800 : spdk_msg_fn fn;
5801 : void *fn_ctx;
5802 : };
5803 :
5804 : static void
5805 0 : set_nvme_hotplug_period_cb(void *_ctx)
5806 : {
5807 0 : struct set_nvme_hotplug_ctx *ctx = _ctx;
5808 :
5809 0 : spdk_poller_unregister(&g_hotplug_poller);
5810 0 : if (ctx->enabled) {
5811 0 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us);
5812 : } else {
5813 0 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
5814 : NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
5815 : }
5816 :
5817 0 : g_nvme_hotplug_poll_period_us = ctx->period_us;
5818 0 : g_nvme_hotplug_enabled = ctx->enabled;
5819 0 : if (ctx->fn) {
5820 0 : ctx->fn(ctx->fn_ctx);
5821 : }
5822 :
5823 0 : free(ctx);
5824 0 : }
5825 :
5826 : int
5827 0 : bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
5828 : {
5829 : struct set_nvme_hotplug_ctx *ctx;
5830 :
5831 0 : if (enabled == true && !spdk_process_is_primary()) {
5832 0 : return -EPERM;
5833 : }
5834 :
5835 0 : ctx = calloc(1, sizeof(*ctx));
5836 0 : if (ctx == NULL) {
5837 0 : return -ENOMEM;
5838 : }
5839 :
5840 0 : period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
5841 0 : ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
5842 0 : ctx->enabled = enabled;
5843 0 : ctx->fn = cb;
5844 0 : ctx->fn_ctx = cb_ctx;
5845 :
5846 0 : spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
5847 0 : return 0;
5848 : }
5849 :
5850 : static void
5851 45 : nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
5852 : struct nvme_async_probe_ctx *ctx)
5853 : {
5854 : struct nvme_ns *nvme_ns;
5855 : struct nvme_bdev *nvme_bdev;
5856 : size_t j;
5857 :
5858 45 : assert(nvme_ctrlr != NULL);
5859 :
5860 45 : if (ctx->names == NULL) {
5861 0 : ctx->reported_bdevs = 0;
5862 0 : populate_namespaces_cb(ctx, 0);
5863 0 : return;
5864 : }
5865 :
5866 : /*
5867 : * Report the new bdevs that were created in this call.
5868 : * There can be more than one bdev per NVMe controller.
5869 : */
5870 45 : j = 0;
5871 45 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
5872 92 : while (nvme_ns != NULL) {
5873 47 : nvme_bdev = nvme_ns->bdev;
5874 47 : if (j < ctx->max_bdevs) {
5875 47 : ctx->names[j] = nvme_bdev->disk.name;
5876 47 : j++;
5877 : } else {
5878 0 : SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n",
5879 : ctx->max_bdevs);
5880 0 : ctx->reported_bdevs = 0;
5881 0 : populate_namespaces_cb(ctx, -ERANGE);
5882 0 : return;
5883 : }
5884 :
5885 47 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
5886 : }
5887 :
5888 45 : ctx->reported_bdevs = j;
5889 45 : populate_namespaces_cb(ctx, 0);
5890 : }
5891 :
5892 : static int
5893 9 : bdev_nvme_check_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
5894 : struct spdk_nvme_ctrlr *new_ctrlr,
5895 : struct spdk_nvme_transport_id *trid)
5896 : {
5897 : struct nvme_path_id *tmp_trid;
5898 :
5899 9 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
5900 0 : SPDK_ERRLOG("PCIe failover is not supported.\n");
5901 0 : return -ENOTSUP;
5902 : }
5903 :
5904 : /* Currently we only support failover to the same transport type. */
5905 9 : if (nvme_ctrlr->active_path_id->trid.trtype != trid->trtype) {
5906 0 : SPDK_WARNLOG("Failover from trtype: %s to a different trtype: %s is not supported currently\n",
5907 : spdk_nvme_transport_id_trtype_str(nvme_ctrlr->active_path_id->trid.trtype),
5908 : spdk_nvme_transport_id_trtype_str(trid->trtype));
5909 0 : return -EINVAL;
5910 : }
5911 :
5912 :
5913 : /* Currently we only support failover to the same NQN. */
5914 9 : if (strncmp(trid->subnqn, nvme_ctrlr->active_path_id->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
5915 0 : SPDK_WARNLOG("Failover from subnqn: %s to a different subnqn: %s is not supported currently\n",
5916 : nvme_ctrlr->active_path_id->trid.subnqn, trid->subnqn);
5917 0 : return -EINVAL;
5918 : }
5919 :
5920 : /* Skip all the other checks if we've already registered this path. */
5921 21 : TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) {
5922 12 : if (!spdk_nvme_transport_id_compare(&tmp_trid->trid, trid)) {
5923 0 : SPDK_WARNLOG("This path (traddr: %s subnqn: %s) is already registered\n", trid->traddr,
5924 : trid->subnqn);
5925 0 : return -EALREADY;
5926 : }
5927 : }
5928 :
5929 9 : return 0;
5930 : }
5931 :
5932 : static int
5933 9 : bdev_nvme_check_secondary_namespace(struct nvme_ctrlr *nvme_ctrlr,
5934 : struct spdk_nvme_ctrlr *new_ctrlr)
5935 : {
5936 : struct nvme_ns *nvme_ns;
5937 : struct spdk_nvme_ns *new_ns;
5938 :
5939 9 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
5940 9 : while (nvme_ns != NULL) {
5941 0 : new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id);
5942 0 : assert(new_ns != NULL);
5943 :
5944 0 : if (!bdev_nvme_compare_ns(nvme_ns->ns, new_ns)) {
5945 0 : return -EINVAL;
5946 : }
5947 :
5948 0 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
5949 : }
5950 :
5951 9 : return 0;
5952 : }
5953 :
5954 : static int
5955 9 : _bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
5956 : struct spdk_nvme_transport_id *trid)
5957 : {
5958 : struct nvme_path_id *active_id, *new_trid, *tmp_trid;
5959 :
5960 9 : new_trid = calloc(1, sizeof(*new_trid));
5961 9 : if (new_trid == NULL) {
5962 0 : return -ENOMEM;
5963 : }
5964 9 : new_trid->trid = *trid;
5965 :
5966 9 : active_id = nvme_ctrlr->active_path_id;
5967 9 : assert(active_id != NULL);
5968 9 : assert(active_id == TAILQ_FIRST(&nvme_ctrlr->trids));
5969 :
5970 : /* Skip the active trid not to replace it until it is failed. */
5971 9 : tmp_trid = TAILQ_NEXT(active_id, link);
5972 9 : if (tmp_trid == NULL) {
5973 6 : goto add_tail;
5974 : }
5975 :
5976 : /* It means the trid is faled if its last failed time is non-zero.
5977 : * Insert the new alternate trid before any failed trid.
5978 : */
5979 5 : TAILQ_FOREACH_FROM(tmp_trid, &nvme_ctrlr->trids, link) {
5980 3 : if (tmp_trid->last_failed_tsc != 0) {
5981 1 : TAILQ_INSERT_BEFORE(tmp_trid, new_trid, link);
5982 1 : return 0;
5983 : }
5984 : }
5985 :
5986 2 : add_tail:
5987 8 : TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, new_trid, link);
5988 8 : return 0;
5989 : }
5990 :
5991 : /* This is the case that a secondary path is added to an existing
5992 : * nvme_ctrlr for failover. After checking if it can access the same
5993 : * namespaces as the primary path, it is disconnected until failover occurs.
5994 : */
5995 : static int
5996 9 : bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
5997 : struct spdk_nvme_ctrlr *new_ctrlr,
5998 : struct spdk_nvme_transport_id *trid)
5999 : {
6000 : int rc;
6001 :
6002 9 : assert(nvme_ctrlr != NULL);
6003 :
6004 9 : pthread_mutex_lock(&nvme_ctrlr->mutex);
6005 :
6006 9 : rc = bdev_nvme_check_secondary_trid(nvme_ctrlr, new_ctrlr, trid);
6007 9 : if (rc != 0) {
6008 0 : goto exit;
6009 : }
6010 :
6011 9 : rc = bdev_nvme_check_secondary_namespace(nvme_ctrlr, new_ctrlr);
6012 9 : if (rc != 0) {
6013 0 : goto exit;
6014 : }
6015 :
6016 9 : rc = _bdev_nvme_add_secondary_trid(nvme_ctrlr, trid);
6017 :
6018 9 : exit:
6019 9 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6020 :
6021 9 : spdk_nvme_detach(new_ctrlr);
6022 :
6023 9 : return rc;
6024 : }
6025 :
6026 : static void
6027 46 : connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6028 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
6029 : {
6030 46 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
6031 : struct nvme_async_probe_ctx *ctx;
6032 : int rc;
6033 :
6034 46 : ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
6035 46 : ctx->ctrlr_attached = true;
6036 :
6037 46 : rc = nvme_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx);
6038 46 : if (rc != 0) {
6039 1 : ctx->reported_bdevs = 0;
6040 1 : populate_namespaces_cb(ctx, rc);
6041 : }
6042 46 : }
6043 :
6044 : static void
6045 4 : connect_set_failover_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6046 : struct spdk_nvme_ctrlr *ctrlr,
6047 : const struct spdk_nvme_ctrlr_opts *opts)
6048 : {
6049 4 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
6050 : struct nvme_ctrlr *nvme_ctrlr;
6051 : struct nvme_async_probe_ctx *ctx;
6052 : int rc;
6053 :
6054 4 : ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
6055 4 : ctx->ctrlr_attached = true;
6056 :
6057 4 : nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
6058 4 : if (nvme_ctrlr) {
6059 4 : rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, ctrlr, &ctx->trid);
6060 : } else {
6061 0 : rc = -ENODEV;
6062 : }
6063 :
6064 4 : ctx->reported_bdevs = 0;
6065 4 : populate_namespaces_cb(ctx, rc);
6066 4 : }
6067 :
6068 : static int
6069 51 : bdev_nvme_async_poll(void *arg)
6070 : {
6071 51 : struct nvme_async_probe_ctx *ctx = arg;
6072 : int rc;
6073 :
6074 51 : rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
6075 51 : if (spdk_unlikely(rc != -EAGAIN)) {
6076 51 : ctx->probe_done = true;
6077 51 : spdk_poller_unregister(&ctx->poller);
6078 51 : if (!ctx->ctrlr_attached) {
6079 : /* The probe is done, but no controller was attached.
6080 : * That means we had a failure, so report -EIO back to
6081 : * the caller (usually the RPC). populate_namespaces_cb()
6082 : * will take care of freeing the nvme_async_probe_ctx.
6083 : */
6084 1 : ctx->reported_bdevs = 0;
6085 1 : populate_namespaces_cb(ctx, -EIO);
6086 50 : } else if (ctx->namespaces_populated) {
6087 : /* The namespaces for the attached controller were all
6088 : * populated and the response was already sent to the
6089 : * caller (usually the RPC). So free the context here.
6090 : */
6091 20 : free_nvme_async_probe_ctx(ctx);
6092 : }
6093 : }
6094 :
6095 51 : return SPDK_POLLER_BUSY;
6096 : }
6097 :
6098 : static bool
6099 28 : bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
6100 : uint32_t reconnect_delay_sec,
6101 : uint32_t fast_io_fail_timeout_sec)
6102 : {
6103 28 : if (ctrlr_loss_timeout_sec < -1) {
6104 1 : SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n");
6105 1 : return false;
6106 27 : } else if (ctrlr_loss_timeout_sec == -1) {
6107 13 : if (reconnect_delay_sec == 0) {
6108 1 : SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
6109 1 : return false;
6110 12 : } else if (fast_io_fail_timeout_sec != 0 &&
6111 : fast_io_fail_timeout_sec < reconnect_delay_sec) {
6112 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io-fail_timeout_sec.\n");
6113 1 : return false;
6114 : }
6115 14 : } else if (ctrlr_loss_timeout_sec != 0) {
6116 11 : if (reconnect_delay_sec == 0) {
6117 1 : SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
6118 1 : return false;
6119 10 : } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) {
6120 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n");
6121 1 : return false;
6122 9 : } else if (fast_io_fail_timeout_sec != 0) {
6123 6 : if (fast_io_fail_timeout_sec < reconnect_delay_sec) {
6124 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io_fail_timeout_sec.\n");
6125 1 : return false;
6126 5 : } else if (fast_io_fail_timeout_sec > (uint32_t)ctrlr_loss_timeout_sec) {
6127 1 : SPDK_ERRLOG("fast_io_fail_timeout_sec can't be more than ctrlr_loss_timeout_sec.\n");
6128 1 : return false;
6129 : }
6130 : }
6131 3 : } else if (reconnect_delay_sec != 0 || fast_io_fail_timeout_sec != 0) {
6132 2 : SPDK_ERRLOG("Both reconnect_delay_sec and fast_io_fail_timeout_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n");
6133 2 : return false;
6134 : }
6135 :
6136 19 : return true;
6137 : }
6138 :
6139 : static int
6140 0 : bdev_nvme_load_psk(const char *fname, char *buf, size_t bufsz)
6141 : {
6142 : FILE *psk_file;
6143 0 : struct stat statbuf;
6144 : int rc;
6145 : #define TCP_PSK_INVALID_PERMISSIONS 0177
6146 :
6147 0 : if (stat(fname, &statbuf) != 0) {
6148 0 : SPDK_ERRLOG("Could not read permissions for PSK file\n");
6149 0 : return -EACCES;
6150 : }
6151 :
6152 0 : if ((statbuf.st_mode & TCP_PSK_INVALID_PERMISSIONS) != 0) {
6153 0 : SPDK_ERRLOG("Incorrect permissions for PSK file\n");
6154 0 : return -EPERM;
6155 : }
6156 0 : if ((size_t)statbuf.st_size >= bufsz) {
6157 0 : SPDK_ERRLOG("Invalid PSK: too long\n");
6158 0 : return -EINVAL;
6159 : }
6160 0 : psk_file = fopen(fname, "r");
6161 0 : if (psk_file == NULL) {
6162 0 : SPDK_ERRLOG("Could not open PSK file\n");
6163 0 : return -EINVAL;
6164 : }
6165 :
6166 0 : memset(buf, 0, bufsz);
6167 0 : rc = fread(buf, 1, statbuf.st_size, psk_file);
6168 0 : if (rc != statbuf.st_size) {
6169 0 : SPDK_ERRLOG("Failed to read PSK\n");
6170 0 : fclose(psk_file);
6171 0 : return -EINVAL;
6172 : }
6173 :
6174 0 : fclose(psk_file);
6175 0 : return 0;
6176 : }
6177 :
6178 : int
6179 51 : bdev_nvme_create(struct spdk_nvme_transport_id *trid,
6180 : const char *base_name,
6181 : const char **names,
6182 : uint32_t count,
6183 : spdk_bdev_create_nvme_fn cb_fn,
6184 : void *cb_ctx,
6185 : struct spdk_nvme_ctrlr_opts *drv_opts,
6186 : struct nvme_ctrlr_opts *bdev_opts,
6187 : bool multipath)
6188 : {
6189 : struct nvme_probe_skip_entry *entry, *tmp;
6190 : struct nvme_async_probe_ctx *ctx;
6191 : spdk_nvme_attach_cb attach_cb;
6192 : int rc, len;
6193 :
6194 : /* TODO expand this check to include both the host and target TRIDs.
6195 : * Only if both are the same should we fail.
6196 : */
6197 51 : if (nvme_ctrlr_get(trid, drv_opts->hostnqn) != NULL) {
6198 0 : SPDK_ERRLOG("A controller with the provided trid (traddr: %s, hostnqn: %s) "
6199 : "already exists.\n", trid->traddr, drv_opts->hostnqn);
6200 0 : return -EEXIST;
6201 : }
6202 :
6203 51 : len = strnlen(base_name, SPDK_CONTROLLER_NAME_MAX);
6204 :
6205 51 : if (len == 0 || len == SPDK_CONTROLLER_NAME_MAX) {
6206 0 : SPDK_ERRLOG("controller name must be between 1 and %d characters\n", SPDK_CONTROLLER_NAME_MAX - 1);
6207 0 : return -EINVAL;
6208 : }
6209 :
6210 51 : if (bdev_opts != NULL &&
6211 9 : !bdev_nvme_check_io_error_resiliency_params(bdev_opts->ctrlr_loss_timeout_sec,
6212 : bdev_opts->reconnect_delay_sec,
6213 : bdev_opts->fast_io_fail_timeout_sec)) {
6214 0 : return -EINVAL;
6215 : }
6216 :
6217 51 : ctx = calloc(1, sizeof(*ctx));
6218 51 : if (!ctx) {
6219 0 : return -ENOMEM;
6220 : }
6221 51 : ctx->base_name = base_name;
6222 51 : ctx->names = names;
6223 51 : ctx->max_bdevs = count;
6224 51 : ctx->cb_fn = cb_fn;
6225 51 : ctx->cb_ctx = cb_ctx;
6226 51 : ctx->trid = *trid;
6227 :
6228 51 : if (bdev_opts) {
6229 9 : memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
6230 : } else {
6231 42 : bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
6232 : }
6233 :
6234 51 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
6235 0 : TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
6236 0 : if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
6237 0 : TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
6238 0 : free(entry);
6239 0 : break;
6240 : }
6241 : }
6242 : }
6243 :
6244 51 : memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
6245 51 : ctx->drv_opts.transport_retry_count = g_opts.transport_retry_count;
6246 51 : ctx->drv_opts.transport_ack_timeout = g_opts.transport_ack_timeout;
6247 51 : ctx->drv_opts.keep_alive_timeout_ms = g_opts.keep_alive_timeout_ms;
6248 51 : ctx->drv_opts.disable_read_ana_log_page = true;
6249 51 : ctx->drv_opts.transport_tos = g_opts.transport_tos;
6250 :
6251 51 : if (ctx->bdev_opts.psk[0] != '\0') {
6252 : /* Try to use the keyring first */
6253 0 : ctx->drv_opts.tls_psk = spdk_keyring_get_key(ctx->bdev_opts.psk);
6254 0 : if (ctx->drv_opts.tls_psk == NULL) {
6255 0 : rc = bdev_nvme_load_psk(ctx->bdev_opts.psk,
6256 0 : ctx->drv_opts.psk, sizeof(ctx->drv_opts.psk));
6257 0 : if (rc != 0) {
6258 0 : SPDK_ERRLOG("Could not load PSK from %s\n", ctx->bdev_opts.psk);
6259 0 : free_nvme_async_probe_ctx(ctx);
6260 0 : return rc;
6261 : }
6262 : }
6263 : }
6264 :
6265 51 : if (ctx->bdev_opts.dhchap_key != NULL) {
6266 0 : ctx->drv_opts.dhchap_key = spdk_keyring_get_key(ctx->bdev_opts.dhchap_key);
6267 0 : if (ctx->drv_opts.dhchap_key == NULL) {
6268 0 : SPDK_ERRLOG("Could not load DH-HMAC-CHAP key: %s\n",
6269 : ctx->bdev_opts.dhchap_key);
6270 0 : free_nvme_async_probe_ctx(ctx);
6271 0 : return -ENOKEY;
6272 : }
6273 :
6274 0 : ctx->drv_opts.dhchap_digests = g_opts.dhchap_digests;
6275 0 : ctx->drv_opts.dhchap_dhgroups = g_opts.dhchap_dhgroups;
6276 : }
6277 51 : if (ctx->bdev_opts.dhchap_ctrlr_key != NULL) {
6278 0 : ctx->drv_opts.dhchap_ctrlr_key =
6279 0 : spdk_keyring_get_key(ctx->bdev_opts.dhchap_ctrlr_key);
6280 0 : if (ctx->drv_opts.dhchap_ctrlr_key == NULL) {
6281 0 : SPDK_ERRLOG("Could not load DH-HMAC-CHAP controller key: %s\n",
6282 : ctx->bdev_opts.dhchap_ctrlr_key);
6283 0 : free_nvme_async_probe_ctx(ctx);
6284 0 : return -ENOKEY;
6285 : }
6286 : }
6287 :
6288 51 : if (nvme_bdev_ctrlr_get_by_name(base_name) == NULL || multipath) {
6289 47 : attach_cb = connect_attach_cb;
6290 : } else {
6291 4 : attach_cb = connect_set_failover_cb;
6292 : }
6293 :
6294 51 : ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, attach_cb);
6295 51 : if (ctx->probe_ctx == NULL) {
6296 0 : SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
6297 0 : free_nvme_async_probe_ctx(ctx);
6298 0 : return -ENODEV;
6299 : }
6300 51 : ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000);
6301 :
6302 51 : return 0;
6303 : }
6304 :
6305 : struct bdev_nvme_delete_ctx {
6306 : char *name;
6307 : struct nvme_path_id path_id;
6308 : bdev_nvme_delete_done_fn delete_done;
6309 : void *delete_done_ctx;
6310 : uint64_t timeout_ticks;
6311 : struct spdk_poller *poller;
6312 : };
6313 :
6314 : static void
6315 2 : free_bdev_nvme_delete_ctx(struct bdev_nvme_delete_ctx *ctx)
6316 : {
6317 2 : if (ctx != NULL) {
6318 1 : free(ctx->name);
6319 1 : free(ctx);
6320 : }
6321 2 : }
6322 :
6323 : static bool
6324 74 : nvme_path_id_compare(struct nvme_path_id *p, const struct nvme_path_id *path_id)
6325 : {
6326 74 : if (path_id->trid.trtype != 0) {
6327 21 : if (path_id->trid.trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
6328 0 : if (strcasecmp(path_id->trid.trstring, p->trid.trstring) != 0) {
6329 0 : return false;
6330 : }
6331 : } else {
6332 21 : if (path_id->trid.trtype != p->trid.trtype) {
6333 0 : return false;
6334 : }
6335 : }
6336 : }
6337 :
6338 74 : if (!spdk_mem_all_zero(path_id->trid.traddr, sizeof(path_id->trid.traddr))) {
6339 21 : if (strcasecmp(path_id->trid.traddr, p->trid.traddr) != 0) {
6340 11 : return false;
6341 : }
6342 : }
6343 :
6344 63 : if (path_id->trid.adrfam != 0) {
6345 0 : if (path_id->trid.adrfam != p->trid.adrfam) {
6346 0 : return false;
6347 : }
6348 : }
6349 :
6350 63 : if (!spdk_mem_all_zero(path_id->trid.trsvcid, sizeof(path_id->trid.trsvcid))) {
6351 10 : if (strcasecmp(path_id->trid.trsvcid, p->trid.trsvcid) != 0) {
6352 0 : return false;
6353 : }
6354 : }
6355 :
6356 63 : if (!spdk_mem_all_zero(path_id->trid.subnqn, sizeof(path_id->trid.subnqn))) {
6357 10 : if (strcmp(path_id->trid.subnqn, p->trid.subnqn) != 0) {
6358 0 : return false;
6359 : }
6360 : }
6361 :
6362 63 : if (!spdk_mem_all_zero(path_id->hostid.hostaddr, sizeof(path_id->hostid.hostaddr))) {
6363 0 : if (strcmp(path_id->hostid.hostaddr, p->hostid.hostaddr) != 0) {
6364 0 : return false;
6365 : }
6366 : }
6367 :
6368 63 : if (!spdk_mem_all_zero(path_id->hostid.hostsvcid, sizeof(path_id->hostid.hostsvcid))) {
6369 0 : if (strcmp(path_id->hostid.hostsvcid, p->hostid.hostsvcid) != 0) {
6370 0 : return false;
6371 : }
6372 : }
6373 :
6374 63 : return true;
6375 : }
6376 :
6377 : static bool
6378 2 : nvme_path_id_exists(const char *name, const struct nvme_path_id *path_id)
6379 : {
6380 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
6381 : struct nvme_ctrlr *ctrlr;
6382 : struct nvme_path_id *p;
6383 :
6384 2 : pthread_mutex_lock(&g_bdev_nvme_mutex);
6385 2 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
6386 2 : if (!nbdev_ctrlr) {
6387 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6388 1 : return false;
6389 : }
6390 :
6391 1 : TAILQ_FOREACH(ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
6392 1 : pthread_mutex_lock(&ctrlr->mutex);
6393 1 : TAILQ_FOREACH(p, &ctrlr->trids, link) {
6394 1 : if (nvme_path_id_compare(p, path_id)) {
6395 1 : pthread_mutex_unlock(&ctrlr->mutex);
6396 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6397 1 : return true;
6398 : }
6399 : }
6400 0 : pthread_mutex_unlock(&ctrlr->mutex);
6401 : }
6402 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6403 :
6404 0 : return false;
6405 : }
6406 :
6407 : static int
6408 2 : bdev_nvme_delete_complete_poll(void *arg)
6409 : {
6410 2 : struct bdev_nvme_delete_ctx *ctx = arg;
6411 2 : int rc = 0;
6412 :
6413 2 : if (nvme_path_id_exists(ctx->name, &ctx->path_id)) {
6414 1 : if (ctx->timeout_ticks > spdk_get_ticks()) {
6415 1 : return SPDK_POLLER_BUSY;
6416 : }
6417 :
6418 0 : SPDK_ERRLOG("NVMe path '%s' still exists after delete\n", ctx->name);
6419 0 : rc = -ETIMEDOUT;
6420 : }
6421 :
6422 1 : spdk_poller_unregister(&ctx->poller);
6423 :
6424 1 : ctx->delete_done(ctx->delete_done_ctx, rc);
6425 1 : free_bdev_nvme_delete_ctx(ctx);
6426 :
6427 1 : return SPDK_POLLER_BUSY;
6428 : }
6429 :
6430 : static int
6431 63 : _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, const struct nvme_path_id *path_id)
6432 : {
6433 : struct nvme_path_id *p, *t;
6434 : spdk_msg_fn msg_fn;
6435 63 : int rc = -ENXIO;
6436 :
6437 63 : pthread_mutex_lock(&nvme_ctrlr->mutex);
6438 :
6439 73 : TAILQ_FOREACH_REVERSE_SAFE(p, &nvme_ctrlr->trids, nvme_paths, link, t) {
6440 73 : if (p == TAILQ_FIRST(&nvme_ctrlr->trids)) {
6441 63 : break;
6442 : }
6443 :
6444 10 : if (!nvme_path_id_compare(p, path_id)) {
6445 3 : continue;
6446 : }
6447 :
6448 : /* We are not using the specified path. */
6449 7 : TAILQ_REMOVE(&nvme_ctrlr->trids, p, link);
6450 7 : free(p);
6451 7 : rc = 0;
6452 : }
6453 :
6454 63 : if (p == NULL || !nvme_path_id_compare(p, path_id)) {
6455 8 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6456 8 : return rc;
6457 : }
6458 :
6459 : /* If we made it here, then this path is a match! Now we need to remove it. */
6460 :
6461 : /* This is the active path in use right now. The active path is always the first in the list. */
6462 55 : assert(p == nvme_ctrlr->active_path_id);
6463 :
6464 55 : if (!TAILQ_NEXT(p, link)) {
6465 : /* The current path is the only path. */
6466 54 : msg_fn = _nvme_ctrlr_destruct;
6467 54 : rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, false);
6468 : } else {
6469 : /* There is an alternative path. */
6470 1 : msg_fn = _bdev_nvme_reset_ctrlr;
6471 1 : rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, true);
6472 : }
6473 :
6474 55 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6475 :
6476 55 : if (rc == 0) {
6477 55 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
6478 0 : } else if (rc == -EALREADY) {
6479 0 : rc = 0;
6480 : }
6481 :
6482 55 : return rc;
6483 : }
6484 :
6485 : int
6486 48 : bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id,
6487 : bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx)
6488 : {
6489 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
6490 : struct nvme_ctrlr *nvme_ctrlr, *tmp_nvme_ctrlr;
6491 48 : struct bdev_nvme_delete_ctx *ctx = NULL;
6492 48 : int rc = -ENXIO, _rc;
6493 :
6494 48 : if (name == NULL || path_id == NULL) {
6495 0 : rc = -EINVAL;
6496 0 : goto exit;
6497 : }
6498 :
6499 48 : pthread_mutex_lock(&g_bdev_nvme_mutex);
6500 :
6501 48 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
6502 48 : if (nbdev_ctrlr == NULL) {
6503 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6504 :
6505 0 : SPDK_ERRLOG("Failed to find NVMe bdev controller\n");
6506 0 : rc = -ENODEV;
6507 0 : goto exit;
6508 : }
6509 :
6510 111 : TAILQ_FOREACH_SAFE(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq, tmp_nvme_ctrlr) {
6511 63 : _rc = _bdev_nvme_delete(nvme_ctrlr, path_id);
6512 63 : if (_rc < 0 && _rc != -ENXIO) {
6513 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6514 0 : rc = _rc;
6515 0 : goto exit;
6516 63 : } else if (_rc == 0) {
6517 : /* We traverse all remaining nvme_ctrlrs even if one nvme_ctrlr
6518 : * was deleted successfully. To remember the successful deletion,
6519 : * overwrite rc only if _rc is zero.
6520 : */
6521 57 : rc = 0;
6522 : }
6523 : }
6524 :
6525 48 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6526 :
6527 48 : if (rc != 0 || delete_done == NULL) {
6528 47 : goto exit;
6529 : }
6530 :
6531 1 : ctx = calloc(1, sizeof(*ctx));
6532 1 : if (ctx == NULL) {
6533 0 : SPDK_ERRLOG("Failed to allocate context for bdev_nvme_delete\n");
6534 0 : rc = -ENOMEM;
6535 0 : goto exit;
6536 : }
6537 :
6538 1 : ctx->name = strdup(name);
6539 1 : if (ctx->name == NULL) {
6540 0 : SPDK_ERRLOG("Failed to copy controller name for deletion\n");
6541 0 : rc = -ENOMEM;
6542 0 : goto exit;
6543 : }
6544 :
6545 1 : ctx->delete_done = delete_done;
6546 1 : ctx->delete_done_ctx = delete_done_ctx;
6547 1 : ctx->path_id = *path_id;
6548 1 : ctx->timeout_ticks = spdk_get_ticks() + 10 * spdk_get_ticks_hz();
6549 1 : ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_delete_complete_poll, ctx, 1000);
6550 1 : if (ctx->poller == NULL) {
6551 0 : SPDK_ERRLOG("Failed to register bdev_nvme_delete poller\n");
6552 0 : rc = -ENOMEM;
6553 0 : goto exit;
6554 : }
6555 :
6556 1 : exit:
6557 48 : if (rc != 0) {
6558 1 : free_bdev_nvme_delete_ctx(ctx);
6559 : }
6560 :
6561 48 : return rc;
6562 : }
6563 :
6564 : #define DISCOVERY_INFOLOG(ctx, format, ...) \
6565 : SPDK_INFOLOG(bdev_nvme, "Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
6566 :
6567 : #define DISCOVERY_ERRLOG(ctx, format, ...) \
6568 : SPDK_ERRLOG("Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
6569 :
6570 : struct discovery_entry_ctx {
6571 : char name[128];
6572 : struct spdk_nvme_transport_id trid;
6573 : struct spdk_nvme_ctrlr_opts drv_opts;
6574 : struct spdk_nvmf_discovery_log_page_entry entry;
6575 : TAILQ_ENTRY(discovery_entry_ctx) tailq;
6576 : struct discovery_ctx *ctx;
6577 : };
6578 :
6579 : struct discovery_ctx {
6580 : char *name;
6581 : spdk_bdev_nvme_start_discovery_fn start_cb_fn;
6582 : spdk_bdev_nvme_stop_discovery_fn stop_cb_fn;
6583 : void *cb_ctx;
6584 : struct spdk_nvme_probe_ctx *probe_ctx;
6585 : struct spdk_nvme_detach_ctx *detach_ctx;
6586 : struct spdk_nvme_ctrlr *ctrlr;
6587 : struct spdk_nvme_transport_id trid;
6588 : struct discovery_entry_ctx *entry_ctx_in_use;
6589 : struct spdk_poller *poller;
6590 : struct spdk_nvme_ctrlr_opts drv_opts;
6591 : struct nvme_ctrlr_opts bdev_opts;
6592 : struct spdk_nvmf_discovery_log_page *log_page;
6593 : TAILQ_ENTRY(discovery_ctx) tailq;
6594 : TAILQ_HEAD(, discovery_entry_ctx) nvm_entry_ctxs;
6595 : TAILQ_HEAD(, discovery_entry_ctx) discovery_entry_ctxs;
6596 : int rc;
6597 : bool wait_for_attach;
6598 : uint64_t timeout_ticks;
6599 : /* Denotes that the discovery service is being started. We're waiting
6600 : * for the initial connection to the discovery controller to be
6601 : * established and attach discovered NVM ctrlrs.
6602 : */
6603 : bool initializing;
6604 : /* Denotes if a discovery is currently in progress for this context.
6605 : * That includes connecting to newly discovered subsystems. Used to
6606 : * ensure we do not start a new discovery until an existing one is
6607 : * complete.
6608 : */
6609 : bool in_progress;
6610 :
6611 : /* Denotes if another discovery is needed after the one in progress
6612 : * completes. Set when we receive an AER completion while a discovery
6613 : * is already in progress.
6614 : */
6615 : bool pending;
6616 :
6617 : /* Signal to the discovery context poller that it should stop the
6618 : * discovery service, including detaching from the current discovery
6619 : * controller.
6620 : */
6621 : bool stop;
6622 :
6623 : struct spdk_thread *calling_thread;
6624 : uint32_t index;
6625 : uint32_t attach_in_progress;
6626 : char *hostnqn;
6627 :
6628 : /* Denotes if the discovery service was started by the mdns discovery.
6629 : */
6630 : bool from_mdns_discovery_service;
6631 : };
6632 :
6633 : TAILQ_HEAD(discovery_ctxs, discovery_ctx);
6634 : static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs);
6635 :
6636 : static void get_discovery_log_page(struct discovery_ctx *ctx);
6637 :
6638 : static void
6639 0 : free_discovery_ctx(struct discovery_ctx *ctx)
6640 : {
6641 0 : free(ctx->log_page);
6642 0 : free(ctx->hostnqn);
6643 0 : free(ctx->name);
6644 0 : free(ctx);
6645 0 : }
6646 :
6647 : static void
6648 0 : discovery_complete(struct discovery_ctx *ctx)
6649 : {
6650 0 : ctx->initializing = false;
6651 0 : ctx->in_progress = false;
6652 0 : if (ctx->pending) {
6653 0 : ctx->pending = false;
6654 0 : get_discovery_log_page(ctx);
6655 : }
6656 0 : }
6657 :
6658 : static void
6659 0 : build_trid_from_log_page_entry(struct spdk_nvme_transport_id *trid,
6660 : struct spdk_nvmf_discovery_log_page_entry *entry)
6661 : {
6662 : char *space;
6663 :
6664 0 : trid->trtype = entry->trtype;
6665 0 : trid->adrfam = entry->adrfam;
6666 0 : memcpy(trid->traddr, entry->traddr, sizeof(entry->traddr));
6667 0 : memcpy(trid->trsvcid, entry->trsvcid, sizeof(entry->trsvcid));
6668 : /* Because the source buffer (entry->subnqn) is longer than trid->subnqn, and
6669 : * before call to this function trid->subnqn is zeroed out, we need
6670 : * to copy sizeof(trid->subnqn) minus one byte to make sure the last character
6671 : * remains 0. Then we can shorten the string (replace ' ' with 0) if required
6672 : */
6673 0 : memcpy(trid->subnqn, entry->subnqn, sizeof(trid->subnqn) - 1);
6674 :
6675 : /* We want the traddr, trsvcid and subnqn fields to be NULL-terminated.
6676 : * But the log page entries typically pad them with spaces, not zeroes.
6677 : * So add a NULL terminator to each of these fields at the appropriate
6678 : * location.
6679 : */
6680 0 : space = strchr(trid->traddr, ' ');
6681 0 : if (space) {
6682 0 : *space = 0;
6683 : }
6684 0 : space = strchr(trid->trsvcid, ' ');
6685 0 : if (space) {
6686 0 : *space = 0;
6687 : }
6688 0 : space = strchr(trid->subnqn, ' ');
6689 0 : if (space) {
6690 0 : *space = 0;
6691 : }
6692 0 : }
6693 :
6694 : static void
6695 0 : _stop_discovery(void *_ctx)
6696 : {
6697 0 : struct discovery_ctx *ctx = _ctx;
6698 :
6699 0 : if (ctx->attach_in_progress > 0) {
6700 0 : spdk_thread_send_msg(spdk_get_thread(), _stop_discovery, ctx);
6701 0 : return;
6702 : }
6703 :
6704 0 : ctx->stop = true;
6705 :
6706 0 : while (!TAILQ_EMPTY(&ctx->nvm_entry_ctxs)) {
6707 : struct discovery_entry_ctx *entry_ctx;
6708 0 : struct nvme_path_id path = {};
6709 :
6710 0 : entry_ctx = TAILQ_FIRST(&ctx->nvm_entry_ctxs);
6711 0 : path.trid = entry_ctx->trid;
6712 0 : bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
6713 0 : TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
6714 0 : free(entry_ctx);
6715 : }
6716 :
6717 0 : while (!TAILQ_EMPTY(&ctx->discovery_entry_ctxs)) {
6718 : struct discovery_entry_ctx *entry_ctx;
6719 :
6720 0 : entry_ctx = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
6721 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
6722 0 : free(entry_ctx);
6723 : }
6724 :
6725 0 : free(ctx->entry_ctx_in_use);
6726 0 : ctx->entry_ctx_in_use = NULL;
6727 : }
6728 :
6729 : static void
6730 0 : stop_discovery(struct discovery_ctx *ctx, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
6731 : {
6732 0 : ctx->stop_cb_fn = cb_fn;
6733 0 : ctx->cb_ctx = cb_ctx;
6734 :
6735 0 : if (ctx->attach_in_progress > 0) {
6736 0 : DISCOVERY_INFOLOG(ctx, "stopping discovery with attach_in_progress: %"PRIu32"\n",
6737 : ctx->attach_in_progress);
6738 : }
6739 :
6740 0 : _stop_discovery(ctx);
6741 0 : }
6742 :
6743 : static void
6744 2 : remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr)
6745 : {
6746 : struct discovery_ctx *d_ctx;
6747 : struct nvme_path_id *path_id;
6748 2 : struct spdk_nvme_transport_id trid = {};
6749 : struct discovery_entry_ctx *entry_ctx, *tmp;
6750 :
6751 2 : path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
6752 :
6753 2 : TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
6754 0 : TAILQ_FOREACH_SAFE(entry_ctx, &d_ctx->nvm_entry_ctxs, tailq, tmp) {
6755 0 : build_trid_from_log_page_entry(&trid, &entry_ctx->entry);
6756 0 : if (spdk_nvme_transport_id_compare(&trid, &path_id->trid) != 0) {
6757 0 : continue;
6758 : }
6759 :
6760 0 : TAILQ_REMOVE(&d_ctx->nvm_entry_ctxs, entry_ctx, tailq);
6761 0 : free(entry_ctx);
6762 0 : DISCOVERY_INFOLOG(d_ctx, "Remove discovery entry: %s:%s:%s\n",
6763 : trid.subnqn, trid.traddr, trid.trsvcid);
6764 :
6765 : /* Fail discovery ctrlr to force reattach attempt */
6766 0 : spdk_nvme_ctrlr_fail(d_ctx->ctrlr);
6767 : }
6768 : }
6769 2 : }
6770 :
6771 : static void
6772 0 : discovery_remove_controllers(struct discovery_ctx *ctx)
6773 : {
6774 0 : struct spdk_nvmf_discovery_log_page *log_page = ctx->log_page;
6775 : struct discovery_entry_ctx *entry_ctx, *tmp;
6776 : struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
6777 0 : struct spdk_nvme_transport_id old_trid = {};
6778 : uint64_t numrec, i;
6779 : bool found;
6780 :
6781 0 : numrec = from_le64(&log_page->numrec);
6782 0 : TAILQ_FOREACH_SAFE(entry_ctx, &ctx->nvm_entry_ctxs, tailq, tmp) {
6783 0 : found = false;
6784 0 : old_entry = &entry_ctx->entry;
6785 0 : build_trid_from_log_page_entry(&old_trid, old_entry);
6786 0 : for (i = 0; i < numrec; i++) {
6787 0 : new_entry = &log_page->entries[i];
6788 0 : if (!memcmp(old_entry, new_entry, sizeof(*old_entry))) {
6789 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s found again\n",
6790 : old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
6791 0 : found = true;
6792 0 : break;
6793 : }
6794 : }
6795 0 : if (!found) {
6796 0 : struct nvme_path_id path = {};
6797 :
6798 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s not found\n",
6799 : old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
6800 :
6801 0 : path.trid = entry_ctx->trid;
6802 0 : bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
6803 0 : TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
6804 0 : free(entry_ctx);
6805 : }
6806 : }
6807 0 : free(log_page);
6808 0 : ctx->log_page = NULL;
6809 0 : discovery_complete(ctx);
6810 0 : }
6811 :
6812 : static void
6813 0 : complete_discovery_start(struct discovery_ctx *ctx, int status)
6814 : {
6815 0 : ctx->timeout_ticks = 0;
6816 0 : ctx->rc = status;
6817 0 : if (ctx->start_cb_fn) {
6818 0 : ctx->start_cb_fn(ctx->cb_ctx, status);
6819 0 : ctx->start_cb_fn = NULL;
6820 0 : ctx->cb_ctx = NULL;
6821 : }
6822 0 : }
6823 :
6824 : static void
6825 0 : discovery_attach_controller_done(void *cb_ctx, size_t bdev_count, int rc)
6826 : {
6827 0 : struct discovery_entry_ctx *entry_ctx = cb_ctx;
6828 0 : struct discovery_ctx *ctx = entry_ctx->ctx;
6829 :
6830 0 : DISCOVERY_INFOLOG(ctx, "attach %s done\n", entry_ctx->name);
6831 0 : ctx->attach_in_progress--;
6832 0 : if (ctx->attach_in_progress == 0) {
6833 0 : complete_discovery_start(ctx, ctx->rc);
6834 0 : if (ctx->initializing && ctx->rc != 0) {
6835 0 : DISCOVERY_ERRLOG(ctx, "stopping discovery due to errors: %d\n", ctx->rc);
6836 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
6837 : } else {
6838 0 : discovery_remove_controllers(ctx);
6839 : }
6840 : }
6841 0 : }
6842 :
6843 : static struct discovery_entry_ctx *
6844 0 : create_discovery_entry_ctx(struct discovery_ctx *ctx, struct spdk_nvme_transport_id *trid)
6845 : {
6846 : struct discovery_entry_ctx *new_ctx;
6847 :
6848 0 : new_ctx = calloc(1, sizeof(*new_ctx));
6849 0 : if (new_ctx == NULL) {
6850 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6851 0 : return NULL;
6852 : }
6853 :
6854 0 : new_ctx->ctx = ctx;
6855 0 : memcpy(&new_ctx->trid, trid, sizeof(*trid));
6856 0 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
6857 0 : snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
6858 0 : return new_ctx;
6859 : }
6860 :
6861 : static void
6862 0 : discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
6863 : struct spdk_nvmf_discovery_log_page *log_page)
6864 : {
6865 0 : struct discovery_ctx *ctx = cb_arg;
6866 : struct discovery_entry_ctx *entry_ctx, *tmp;
6867 : struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
6868 : uint64_t numrec, i;
6869 : bool found;
6870 :
6871 0 : if (rc || spdk_nvme_cpl_is_error(cpl)) {
6872 0 : DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
6873 0 : return;
6874 : }
6875 :
6876 0 : ctx->log_page = log_page;
6877 0 : assert(ctx->attach_in_progress == 0);
6878 0 : numrec = from_le64(&log_page->numrec);
6879 0 : TAILQ_FOREACH_SAFE(entry_ctx, &ctx->discovery_entry_ctxs, tailq, tmp) {
6880 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
6881 0 : free(entry_ctx);
6882 : }
6883 0 : for (i = 0; i < numrec; i++) {
6884 0 : found = false;
6885 0 : new_entry = &log_page->entries[i];
6886 0 : if (new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT ||
6887 0 : new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
6888 : struct discovery_entry_ctx *new_ctx;
6889 0 : struct spdk_nvme_transport_id trid = {};
6890 :
6891 0 : build_trid_from_log_page_entry(&trid, new_entry);
6892 0 : new_ctx = create_discovery_entry_ctx(ctx, &trid);
6893 0 : if (new_ctx == NULL) {
6894 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6895 0 : break;
6896 : }
6897 :
6898 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, new_ctx, tailq);
6899 0 : continue;
6900 : }
6901 0 : TAILQ_FOREACH(entry_ctx, &ctx->nvm_entry_ctxs, tailq) {
6902 0 : old_entry = &entry_ctx->entry;
6903 0 : if (!memcmp(new_entry, old_entry, sizeof(*new_entry))) {
6904 0 : found = true;
6905 0 : break;
6906 : }
6907 : }
6908 0 : if (!found) {
6909 0 : struct discovery_entry_ctx *subnqn_ctx = NULL, *new_ctx;
6910 : struct discovery_ctx *d_ctx;
6911 :
6912 0 : TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
6913 0 : TAILQ_FOREACH(subnqn_ctx, &d_ctx->nvm_entry_ctxs, tailq) {
6914 0 : if (!memcmp(subnqn_ctx->entry.subnqn, new_entry->subnqn,
6915 : sizeof(new_entry->subnqn))) {
6916 0 : break;
6917 : }
6918 : }
6919 0 : if (subnqn_ctx) {
6920 0 : break;
6921 : }
6922 : }
6923 :
6924 0 : new_ctx = calloc(1, sizeof(*new_ctx));
6925 0 : if (new_ctx == NULL) {
6926 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6927 0 : break;
6928 : }
6929 :
6930 0 : new_ctx->ctx = ctx;
6931 0 : memcpy(&new_ctx->entry, new_entry, sizeof(*new_entry));
6932 0 : build_trid_from_log_page_entry(&new_ctx->trid, new_entry);
6933 0 : if (subnqn_ctx) {
6934 0 : snprintf(new_ctx->name, sizeof(new_ctx->name), "%s", subnqn_ctx->name);
6935 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new path for %s\n",
6936 : new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
6937 : new_ctx->name);
6938 : } else {
6939 0 : snprintf(new_ctx->name, sizeof(new_ctx->name), "%s%d", ctx->name, ctx->index++);
6940 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new subsystem %s\n",
6941 : new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
6942 : new_ctx->name);
6943 : }
6944 0 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
6945 0 : snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
6946 0 : rc = bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0,
6947 : discovery_attach_controller_done, new_ctx,
6948 : &new_ctx->drv_opts, &ctx->bdev_opts, true);
6949 0 : if (rc == 0) {
6950 0 : TAILQ_INSERT_TAIL(&ctx->nvm_entry_ctxs, new_ctx, tailq);
6951 0 : ctx->attach_in_progress++;
6952 : } else {
6953 0 : DISCOVERY_ERRLOG(ctx, "bdev_nvme_create failed (%s)\n", spdk_strerror(-rc));
6954 : }
6955 : }
6956 : }
6957 :
6958 0 : if (ctx->attach_in_progress == 0) {
6959 0 : discovery_remove_controllers(ctx);
6960 : }
6961 : }
6962 :
6963 : static void
6964 0 : get_discovery_log_page(struct discovery_ctx *ctx)
6965 : {
6966 : int rc;
6967 :
6968 0 : assert(ctx->in_progress == false);
6969 0 : ctx->in_progress = true;
6970 0 : rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx);
6971 0 : if (rc != 0) {
6972 0 : DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
6973 : }
6974 0 : DISCOVERY_INFOLOG(ctx, "sent discovery log page command\n");
6975 0 : }
6976 :
6977 : static void
6978 0 : discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
6979 : {
6980 0 : struct discovery_ctx *ctx = arg;
6981 0 : uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
6982 :
6983 0 : if (spdk_nvme_cpl_is_error(cpl)) {
6984 0 : DISCOVERY_ERRLOG(ctx, "aer failed\n");
6985 0 : return;
6986 : }
6987 :
6988 0 : if (log_page_id != SPDK_NVME_LOG_DISCOVERY) {
6989 0 : DISCOVERY_ERRLOG(ctx, "unexpected log page 0x%x\n", log_page_id);
6990 0 : return;
6991 : }
6992 :
6993 0 : DISCOVERY_INFOLOG(ctx, "got aer\n");
6994 0 : if (ctx->in_progress) {
6995 0 : ctx->pending = true;
6996 0 : return;
6997 : }
6998 :
6999 0 : get_discovery_log_page(ctx);
7000 : }
7001 :
7002 : static void
7003 0 : discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
7004 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
7005 : {
7006 0 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
7007 : struct discovery_ctx *ctx;
7008 :
7009 0 : ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, drv_opts);
7010 :
7011 0 : DISCOVERY_INFOLOG(ctx, "discovery ctrlr attached\n");
7012 0 : ctx->probe_ctx = NULL;
7013 0 : ctx->ctrlr = ctrlr;
7014 :
7015 0 : if (ctx->rc != 0) {
7016 0 : DISCOVERY_ERRLOG(ctx, "encountered error while attaching discovery ctrlr: %d\n",
7017 : ctx->rc);
7018 0 : return;
7019 : }
7020 :
7021 0 : spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx);
7022 : }
7023 :
7024 : static int
7025 0 : discovery_poller(void *arg)
7026 : {
7027 0 : struct discovery_ctx *ctx = arg;
7028 : struct spdk_nvme_transport_id *trid;
7029 : int rc;
7030 :
7031 0 : if (ctx->detach_ctx) {
7032 0 : rc = spdk_nvme_detach_poll_async(ctx->detach_ctx);
7033 0 : if (rc != -EAGAIN) {
7034 0 : ctx->detach_ctx = NULL;
7035 0 : ctx->ctrlr = NULL;
7036 : }
7037 0 : } else if (ctx->stop) {
7038 0 : if (ctx->ctrlr != NULL) {
7039 0 : rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
7040 0 : if (rc == 0) {
7041 0 : return SPDK_POLLER_BUSY;
7042 : }
7043 0 : DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
7044 : }
7045 0 : spdk_poller_unregister(&ctx->poller);
7046 0 : TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
7047 0 : assert(ctx->start_cb_fn == NULL);
7048 0 : if (ctx->stop_cb_fn != NULL) {
7049 0 : ctx->stop_cb_fn(ctx->cb_ctx);
7050 : }
7051 0 : free_discovery_ctx(ctx);
7052 0 : } else if (ctx->probe_ctx == NULL && ctx->ctrlr == NULL) {
7053 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7054 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
7055 0 : assert(ctx->initializing);
7056 0 : spdk_poller_unregister(&ctx->poller);
7057 0 : TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
7058 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7059 0 : stop_discovery(ctx, NULL, NULL);
7060 0 : free_discovery_ctx(ctx);
7061 0 : return SPDK_POLLER_BUSY;
7062 : }
7063 :
7064 0 : assert(ctx->entry_ctx_in_use == NULL);
7065 0 : ctx->entry_ctx_in_use = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
7066 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7067 0 : trid = &ctx->entry_ctx_in_use->trid;
7068 0 : ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, discovery_attach_cb);
7069 0 : if (ctx->probe_ctx) {
7070 0 : spdk_poller_unregister(&ctx->poller);
7071 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000);
7072 : } else {
7073 0 : DISCOVERY_ERRLOG(ctx, "could not start discovery connect\n");
7074 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7075 0 : ctx->entry_ctx_in_use = NULL;
7076 : }
7077 0 : } else if (ctx->probe_ctx) {
7078 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7079 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
7080 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7081 0 : return SPDK_POLLER_BUSY;
7082 : }
7083 :
7084 0 : rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
7085 0 : if (rc != -EAGAIN) {
7086 0 : if (ctx->rc != 0) {
7087 0 : assert(ctx->initializing);
7088 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
7089 : } else {
7090 0 : assert(rc == 0);
7091 0 : DISCOVERY_INFOLOG(ctx, "discovery ctrlr connected\n");
7092 0 : ctx->rc = rc;
7093 0 : get_discovery_log_page(ctx);
7094 : }
7095 : }
7096 : } else {
7097 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7098 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching NVM ctrlrs\n");
7099 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7100 : /* We need to wait until all NVM ctrlrs are attached before we stop the
7101 : * discovery service to make sure we don't detach a ctrlr that is still
7102 : * being attached.
7103 : */
7104 0 : if (ctx->attach_in_progress == 0) {
7105 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
7106 0 : return SPDK_POLLER_BUSY;
7107 : }
7108 : }
7109 :
7110 0 : rc = spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr);
7111 0 : if (rc < 0) {
7112 0 : spdk_poller_unregister(&ctx->poller);
7113 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
7114 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7115 0 : ctx->entry_ctx_in_use = NULL;
7116 :
7117 0 : rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
7118 0 : if (rc != 0) {
7119 0 : DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
7120 0 : ctx->ctrlr = NULL;
7121 : }
7122 : }
7123 : }
7124 :
7125 0 : return SPDK_POLLER_BUSY;
7126 : }
7127 :
7128 : static void
7129 0 : start_discovery_poller(void *arg)
7130 : {
7131 0 : struct discovery_ctx *ctx = arg;
7132 :
7133 0 : TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq);
7134 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
7135 0 : }
7136 :
7137 : int
7138 0 : bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid,
7139 : const char *base_name,
7140 : struct spdk_nvme_ctrlr_opts *drv_opts,
7141 : struct nvme_ctrlr_opts *bdev_opts,
7142 : uint64_t attach_timeout,
7143 : bool from_mdns,
7144 : spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx)
7145 : {
7146 : struct discovery_ctx *ctx;
7147 : struct discovery_entry_ctx *discovery_entry_ctx;
7148 :
7149 0 : snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
7150 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7151 0 : if (strcmp(ctx->name, base_name) == 0) {
7152 0 : return -EEXIST;
7153 : }
7154 :
7155 0 : if (ctx->entry_ctx_in_use != NULL) {
7156 0 : if (!spdk_nvme_transport_id_compare(trid, &ctx->entry_ctx_in_use->trid)) {
7157 0 : return -EEXIST;
7158 : }
7159 : }
7160 :
7161 0 : TAILQ_FOREACH(discovery_entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
7162 0 : if (!spdk_nvme_transport_id_compare(trid, &discovery_entry_ctx->trid)) {
7163 0 : return -EEXIST;
7164 : }
7165 : }
7166 : }
7167 :
7168 0 : ctx = calloc(1, sizeof(*ctx));
7169 0 : if (ctx == NULL) {
7170 0 : return -ENOMEM;
7171 : }
7172 :
7173 0 : ctx->name = strdup(base_name);
7174 0 : if (ctx->name == NULL) {
7175 0 : free_discovery_ctx(ctx);
7176 0 : return -ENOMEM;
7177 : }
7178 0 : memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
7179 0 : memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
7180 0 : ctx->from_mdns_discovery_service = from_mdns;
7181 0 : ctx->bdev_opts.from_discovery_service = true;
7182 0 : ctx->calling_thread = spdk_get_thread();
7183 0 : ctx->start_cb_fn = cb_fn;
7184 0 : ctx->cb_ctx = cb_ctx;
7185 0 : ctx->initializing = true;
7186 0 : if (ctx->start_cb_fn) {
7187 : /* We can use this when dumping json to denote if this RPC parameter
7188 : * was specified or not.
7189 : */
7190 0 : ctx->wait_for_attach = true;
7191 : }
7192 0 : if (attach_timeout != 0) {
7193 0 : ctx->timeout_ticks = spdk_get_ticks() + attach_timeout *
7194 0 : spdk_get_ticks_hz() / 1000ull;
7195 : }
7196 0 : TAILQ_INIT(&ctx->nvm_entry_ctxs);
7197 0 : TAILQ_INIT(&ctx->discovery_entry_ctxs);
7198 0 : memcpy(&ctx->trid, trid, sizeof(*trid));
7199 : /* Even if user did not specify hostnqn, we can still strdup("\0"); */
7200 0 : ctx->hostnqn = strdup(ctx->drv_opts.hostnqn);
7201 0 : if (ctx->hostnqn == NULL) {
7202 0 : free_discovery_ctx(ctx);
7203 0 : return -ENOMEM;
7204 : }
7205 0 : discovery_entry_ctx = create_discovery_entry_ctx(ctx, trid);
7206 0 : if (discovery_entry_ctx == NULL) {
7207 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
7208 0 : free_discovery_ctx(ctx);
7209 0 : return -ENOMEM;
7210 : }
7211 :
7212 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, discovery_entry_ctx, tailq);
7213 0 : spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx);
7214 0 : return 0;
7215 : }
7216 :
7217 : int
7218 0 : bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
7219 : {
7220 : struct discovery_ctx *ctx;
7221 :
7222 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7223 0 : if (strcmp(name, ctx->name) == 0) {
7224 0 : if (ctx->stop) {
7225 0 : return -EALREADY;
7226 : }
7227 : /* If we're still starting the discovery service and ->rc is non-zero, we're
7228 : * going to stop it as soon as we can
7229 : */
7230 0 : if (ctx->initializing && ctx->rc != 0) {
7231 0 : return -EALREADY;
7232 : }
7233 0 : stop_discovery(ctx, cb_fn, cb_ctx);
7234 0 : return 0;
7235 : }
7236 : }
7237 :
7238 0 : return -ENOENT;
7239 : }
7240 :
7241 : static int
7242 1 : bdev_nvme_library_init(void)
7243 : {
7244 1 : g_bdev_nvme_init_thread = spdk_get_thread();
7245 :
7246 1 : spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_create_poll_group_cb,
7247 : bdev_nvme_destroy_poll_group_cb,
7248 : sizeof(struct nvme_poll_group), "nvme_poll_groups");
7249 :
7250 1 : return 0;
7251 : }
7252 :
7253 : static void
7254 1 : bdev_nvme_fini_destruct_ctrlrs(void)
7255 : {
7256 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
7257 : struct nvme_ctrlr *nvme_ctrlr;
7258 :
7259 1 : pthread_mutex_lock(&g_bdev_nvme_mutex);
7260 1 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
7261 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
7262 0 : pthread_mutex_lock(&nvme_ctrlr->mutex);
7263 0 : if (nvme_ctrlr->destruct) {
7264 : /* This controller's destruction was already started
7265 : * before the application started shutting down
7266 : */
7267 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
7268 0 : continue;
7269 : }
7270 0 : nvme_ctrlr->destruct = true;
7271 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
7272 :
7273 0 : spdk_thread_send_msg(nvme_ctrlr->thread, _nvme_ctrlr_destruct,
7274 : nvme_ctrlr);
7275 : }
7276 : }
7277 :
7278 1 : g_bdev_nvme_module_finish = true;
7279 1 : if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
7280 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
7281 1 : spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
7282 1 : spdk_bdev_module_fini_done();
7283 1 : return;
7284 : }
7285 :
7286 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
7287 : }
7288 :
7289 : static void
7290 0 : check_discovery_fini(void *arg)
7291 : {
7292 0 : if (TAILQ_EMPTY(&g_discovery_ctxs)) {
7293 0 : bdev_nvme_fini_destruct_ctrlrs();
7294 : }
7295 0 : }
7296 :
7297 : static void
7298 1 : bdev_nvme_library_fini(void)
7299 : {
7300 : struct nvme_probe_skip_entry *entry, *entry_tmp;
7301 : struct discovery_ctx *ctx;
7302 :
7303 1 : spdk_poller_unregister(&g_hotplug_poller);
7304 1 : free(g_hotplug_probe_ctx);
7305 1 : g_hotplug_probe_ctx = NULL;
7306 :
7307 1 : TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
7308 0 : TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
7309 0 : free(entry);
7310 : }
7311 :
7312 1 : assert(spdk_get_thread() == g_bdev_nvme_init_thread);
7313 1 : if (TAILQ_EMPTY(&g_discovery_ctxs)) {
7314 1 : bdev_nvme_fini_destruct_ctrlrs();
7315 : } else {
7316 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7317 0 : stop_discovery(ctx, check_discovery_fini, NULL);
7318 : }
7319 : }
7320 1 : }
7321 :
7322 : static void
7323 0 : bdev_nvme_verify_pi_error(struct nvme_bdev_io *bio)
7324 : {
7325 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7326 0 : struct spdk_bdev *bdev = bdev_io->bdev;
7327 0 : struct spdk_dif_ctx dif_ctx;
7328 0 : struct spdk_dif_error err_blk = {};
7329 : int rc;
7330 0 : struct spdk_dif_ctx_init_ext_opts dif_opts;
7331 :
7332 0 : dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
7333 0 : dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
7334 0 : rc = spdk_dif_ctx_init(&dif_ctx,
7335 0 : bdev->blocklen, bdev->md_len, bdev->md_interleave,
7336 0 : bdev->dif_is_head_of_md, bdev->dif_type,
7337 : bdev_io->u.bdev.dif_check_flags,
7338 0 : bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0, &dif_opts);
7339 0 : if (rc != 0) {
7340 0 : SPDK_ERRLOG("Initialization of DIF context failed\n");
7341 0 : return;
7342 : }
7343 :
7344 0 : if (bdev->md_interleave) {
7345 0 : rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
7346 0 : bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
7347 : } else {
7348 0 : struct iovec md_iov = {
7349 0 : .iov_base = bdev_io->u.bdev.md_buf,
7350 0 : .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len,
7351 : };
7352 :
7353 0 : rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
7354 0 : &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
7355 : }
7356 :
7357 0 : if (rc != 0) {
7358 0 : SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
7359 : err_blk.err_type, err_blk.err_offset);
7360 : } else {
7361 0 : SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
7362 : }
7363 : }
7364 :
7365 : static void
7366 0 : bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7367 : {
7368 0 : struct nvme_bdev_io *bio = ref;
7369 :
7370 0 : if (spdk_nvme_cpl_is_success(cpl)) {
7371 : /* Run PI verification for read data buffer. */
7372 0 : bdev_nvme_verify_pi_error(bio);
7373 : }
7374 :
7375 : /* Return original completion status */
7376 0 : bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
7377 0 : }
7378 :
7379 : static void
7380 3 : bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7381 : {
7382 3 : struct nvme_bdev_io *bio = ref;
7383 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7384 : int ret;
7385 :
7386 3 : if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
7387 0 : SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
7388 : cpl->status.sct, cpl->status.sc);
7389 :
7390 : /* Save completion status to use after verifying PI error. */
7391 0 : bio->cpl = *cpl;
7392 :
7393 0 : if (spdk_likely(nvme_io_path_is_available(bio->io_path))) {
7394 : /* Read without PI checking to verify PI error. */
7395 0 : ret = bdev_nvme_no_pi_readv(bio,
7396 : bdev_io->u.bdev.iovs,
7397 : bdev_io->u.bdev.iovcnt,
7398 : bdev_io->u.bdev.md_buf,
7399 : bdev_io->u.bdev.num_blocks,
7400 : bdev_io->u.bdev.offset_blocks);
7401 0 : if (ret == 0) {
7402 0 : return;
7403 : }
7404 : }
7405 : }
7406 :
7407 3 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7408 : }
7409 :
7410 : static void
7411 25 : bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7412 : {
7413 25 : struct nvme_bdev_io *bio = ref;
7414 :
7415 25 : if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
7416 0 : SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
7417 : cpl->status.sct, cpl->status.sc);
7418 : /* Run PI verification for write data buffer if PI error is detected. */
7419 0 : bdev_nvme_verify_pi_error(bio);
7420 : }
7421 :
7422 25 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7423 25 : }
7424 :
7425 : static void
7426 0 : bdev_nvme_zone_appendv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7427 : {
7428 0 : struct nvme_bdev_io *bio = ref;
7429 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7430 :
7431 : /* spdk_bdev_io_get_append_location() requires that the ALBA is stored in offset_blocks.
7432 : * Additionally, offset_blocks has to be set before calling bdev_nvme_verify_pi_error().
7433 : */
7434 0 : bdev_io->u.bdev.offset_blocks = *(uint64_t *)&cpl->cdw0;
7435 :
7436 0 : if (spdk_nvme_cpl_is_pi_error(cpl)) {
7437 0 : SPDK_ERRLOG("zone append completed with PI error (sct=%d, sc=%d)\n",
7438 : cpl->status.sct, cpl->status.sc);
7439 : /* Run PI verification for zone append data buffer if PI error is detected. */
7440 0 : bdev_nvme_verify_pi_error(bio);
7441 : }
7442 :
7443 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7444 0 : }
7445 :
7446 : static void
7447 1 : bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7448 : {
7449 1 : struct nvme_bdev_io *bio = ref;
7450 :
7451 1 : if (spdk_nvme_cpl_is_pi_error(cpl)) {
7452 0 : SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n",
7453 : cpl->status.sct, cpl->status.sc);
7454 : /* Run PI verification for compare data buffer if PI error is detected. */
7455 0 : bdev_nvme_verify_pi_error(bio);
7456 : }
7457 :
7458 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7459 1 : }
7460 :
7461 : static void
7462 4 : bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7463 : {
7464 4 : struct nvme_bdev_io *bio = ref;
7465 :
7466 : /* Compare operation completion */
7467 4 : if (!bio->first_fused_completed) {
7468 : /* Save compare result for write callback */
7469 2 : bio->cpl = *cpl;
7470 2 : bio->first_fused_completed = true;
7471 2 : return;
7472 : }
7473 :
7474 : /* Write operation completion */
7475 2 : if (spdk_nvme_cpl_is_error(&bio->cpl)) {
7476 : /* If bio->cpl is already an error, it means the compare operation failed. In that case,
7477 : * complete the IO with the compare operation's status.
7478 : */
7479 1 : if (!spdk_nvme_cpl_is_error(cpl)) {
7480 1 : SPDK_ERRLOG("Unexpected write success after compare failure.\n");
7481 : }
7482 :
7483 1 : bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
7484 : } else {
7485 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7486 : }
7487 : }
7488 :
7489 : static void
7490 1 : bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
7491 : {
7492 1 : struct nvme_bdev_io *bio = ref;
7493 :
7494 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7495 1 : }
7496 :
7497 : static int
7498 0 : fill_zone_from_report(struct spdk_bdev_zone_info *info, struct spdk_nvme_zns_zone_desc *desc)
7499 : {
7500 0 : switch (desc->zt) {
7501 0 : case SPDK_NVME_ZONE_TYPE_SEQWR:
7502 0 : info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
7503 0 : break;
7504 0 : default:
7505 0 : SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", desc->zt);
7506 0 : return -EIO;
7507 : }
7508 :
7509 0 : switch (desc->zs) {
7510 0 : case SPDK_NVME_ZONE_STATE_EMPTY:
7511 0 : info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
7512 0 : break;
7513 0 : case SPDK_NVME_ZONE_STATE_IOPEN:
7514 0 : info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
7515 0 : break;
7516 0 : case SPDK_NVME_ZONE_STATE_EOPEN:
7517 0 : info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
7518 0 : break;
7519 0 : case SPDK_NVME_ZONE_STATE_CLOSED:
7520 0 : info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
7521 0 : break;
7522 0 : case SPDK_NVME_ZONE_STATE_RONLY:
7523 0 : info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
7524 0 : break;
7525 0 : case SPDK_NVME_ZONE_STATE_FULL:
7526 0 : info->state = SPDK_BDEV_ZONE_STATE_FULL;
7527 0 : break;
7528 0 : case SPDK_NVME_ZONE_STATE_OFFLINE:
7529 0 : info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
7530 0 : break;
7531 0 : default:
7532 0 : SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", desc->zs);
7533 0 : return -EIO;
7534 : }
7535 :
7536 0 : info->zone_id = desc->zslba;
7537 0 : info->write_pointer = desc->wp;
7538 0 : info->capacity = desc->zcap;
7539 :
7540 0 : return 0;
7541 : }
7542 :
7543 : static void
7544 0 : bdev_nvme_get_zone_info_done(void *ref, const struct spdk_nvme_cpl *cpl)
7545 : {
7546 0 : struct nvme_bdev_io *bio = ref;
7547 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7548 0 : uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
7549 0 : uint32_t zones_to_copy = bdev_io->u.zone_mgmt.num_zones;
7550 0 : struct spdk_bdev_zone_info *info = bdev_io->u.zone_mgmt.buf;
7551 : uint64_t max_zones_per_buf, i;
7552 : uint32_t zone_report_bufsize;
7553 : struct spdk_nvme_ns *ns;
7554 : struct spdk_nvme_qpair *qpair;
7555 : int ret;
7556 :
7557 0 : if (spdk_nvme_cpl_is_error(cpl)) {
7558 0 : goto out_complete_io_nvme_cpl;
7559 : }
7560 :
7561 0 : if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
7562 0 : ret = -ENXIO;
7563 0 : goto out_complete_io_ret;
7564 : }
7565 :
7566 0 : ns = bio->io_path->nvme_ns->ns;
7567 0 : qpair = bio->io_path->qpair->qpair;
7568 :
7569 0 : zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
7570 0 : max_zones_per_buf = (zone_report_bufsize - sizeof(*bio->zone_report_buf)) /
7571 : sizeof(bio->zone_report_buf->descs[0]);
7572 :
7573 0 : if (bio->zone_report_buf->nr_zones > max_zones_per_buf) {
7574 0 : ret = -EINVAL;
7575 0 : goto out_complete_io_ret;
7576 : }
7577 :
7578 0 : if (!bio->zone_report_buf->nr_zones) {
7579 0 : ret = -EINVAL;
7580 0 : goto out_complete_io_ret;
7581 : }
7582 :
7583 0 : for (i = 0; i < bio->zone_report_buf->nr_zones && bio->handled_zones < zones_to_copy; i++) {
7584 0 : ret = fill_zone_from_report(&info[bio->handled_zones],
7585 0 : &bio->zone_report_buf->descs[i]);
7586 0 : if (ret) {
7587 0 : goto out_complete_io_ret;
7588 : }
7589 0 : bio->handled_zones++;
7590 : }
7591 :
7592 0 : if (bio->handled_zones < zones_to_copy) {
7593 0 : uint64_t zone_size_lba = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
7594 0 : uint64_t slba = zone_id + (zone_size_lba * bio->handled_zones);
7595 :
7596 0 : memset(bio->zone_report_buf, 0, zone_report_bufsize);
7597 0 : ret = spdk_nvme_zns_report_zones(ns, qpair,
7598 0 : bio->zone_report_buf, zone_report_bufsize,
7599 : slba, SPDK_NVME_ZRA_LIST_ALL, true,
7600 : bdev_nvme_get_zone_info_done, bio);
7601 0 : if (!ret) {
7602 0 : return;
7603 : } else {
7604 0 : goto out_complete_io_ret;
7605 : }
7606 : }
7607 :
7608 0 : out_complete_io_nvme_cpl:
7609 0 : free(bio->zone_report_buf);
7610 0 : bio->zone_report_buf = NULL;
7611 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7612 0 : return;
7613 :
7614 0 : out_complete_io_ret:
7615 0 : free(bio->zone_report_buf);
7616 0 : bio->zone_report_buf = NULL;
7617 0 : bdev_nvme_io_complete(bio, ret);
7618 : }
7619 :
7620 : static void
7621 0 : bdev_nvme_zone_management_done(void *ref, const struct spdk_nvme_cpl *cpl)
7622 : {
7623 0 : struct nvme_bdev_io *bio = ref;
7624 :
7625 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7626 0 : }
7627 :
7628 : static void
7629 4 : bdev_nvme_admin_passthru_complete_nvme_status(void *ctx)
7630 : {
7631 4 : struct nvme_bdev_io *bio = ctx;
7632 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7633 4 : const struct spdk_nvme_cpl *cpl = &bio->cpl;
7634 :
7635 4 : assert(bdev_nvme_io_type_is_admin(bdev_io->type));
7636 :
7637 4 : __bdev_nvme_io_complete(bdev_io, 0, cpl);
7638 4 : }
7639 :
7640 : static void
7641 3 : bdev_nvme_abort_complete(void *ctx)
7642 : {
7643 3 : struct nvme_bdev_io *bio = ctx;
7644 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7645 :
7646 3 : if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) {
7647 3 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS, NULL);
7648 : } else {
7649 0 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED, NULL);
7650 : }
7651 3 : }
7652 :
7653 : static void
7654 3 : bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl)
7655 : {
7656 3 : struct nvme_bdev_io *bio = ref;
7657 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7658 :
7659 3 : bio->cpl = *cpl;
7660 3 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_nvme_abort_complete, bio);
7661 3 : }
7662 :
7663 : static void
7664 4 : bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
7665 : {
7666 4 : struct nvme_bdev_io *bio = ref;
7667 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7668 :
7669 4 : bio->cpl = *cpl;
7670 4 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
7671 : bdev_nvme_admin_passthru_complete_nvme_status, bio);
7672 4 : }
7673 :
7674 : static void
7675 0 : bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
7676 : {
7677 0 : struct nvme_bdev_io *bio = ref;
7678 : struct iovec *iov;
7679 :
7680 0 : bio->iov_offset = sgl_offset;
7681 0 : for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
7682 0 : iov = &bio->iovs[bio->iovpos];
7683 0 : if (bio->iov_offset < iov->iov_len) {
7684 0 : break;
7685 : }
7686 :
7687 0 : bio->iov_offset -= iov->iov_len;
7688 : }
7689 0 : }
7690 :
7691 : static int
7692 0 : bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
7693 : {
7694 0 : struct nvme_bdev_io *bio = ref;
7695 : struct iovec *iov;
7696 :
7697 0 : assert(bio->iovpos < bio->iovcnt);
7698 :
7699 0 : iov = &bio->iovs[bio->iovpos];
7700 :
7701 0 : *address = iov->iov_base;
7702 0 : *length = iov->iov_len;
7703 :
7704 0 : if (bio->iov_offset) {
7705 0 : assert(bio->iov_offset <= iov->iov_len);
7706 0 : *address += bio->iov_offset;
7707 0 : *length -= bio->iov_offset;
7708 : }
7709 :
7710 0 : bio->iov_offset += *length;
7711 0 : if (bio->iov_offset == iov->iov_len) {
7712 0 : bio->iovpos++;
7713 0 : bio->iov_offset = 0;
7714 : }
7715 :
7716 0 : return 0;
7717 : }
7718 :
7719 : static void
7720 0 : bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset)
7721 : {
7722 0 : struct nvme_bdev_io *bio = ref;
7723 : struct iovec *iov;
7724 :
7725 0 : bio->fused_iov_offset = sgl_offset;
7726 0 : for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) {
7727 0 : iov = &bio->fused_iovs[bio->fused_iovpos];
7728 0 : if (bio->fused_iov_offset < iov->iov_len) {
7729 0 : break;
7730 : }
7731 :
7732 0 : bio->fused_iov_offset -= iov->iov_len;
7733 : }
7734 0 : }
7735 :
7736 : static int
7737 0 : bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length)
7738 : {
7739 0 : struct nvme_bdev_io *bio = ref;
7740 : struct iovec *iov;
7741 :
7742 0 : assert(bio->fused_iovpos < bio->fused_iovcnt);
7743 :
7744 0 : iov = &bio->fused_iovs[bio->fused_iovpos];
7745 :
7746 0 : *address = iov->iov_base;
7747 0 : *length = iov->iov_len;
7748 :
7749 0 : if (bio->fused_iov_offset) {
7750 0 : assert(bio->fused_iov_offset <= iov->iov_len);
7751 0 : *address += bio->fused_iov_offset;
7752 0 : *length -= bio->fused_iov_offset;
7753 : }
7754 :
7755 0 : bio->fused_iov_offset += *length;
7756 0 : if (bio->fused_iov_offset == iov->iov_len) {
7757 0 : bio->fused_iovpos++;
7758 0 : bio->fused_iov_offset = 0;
7759 : }
7760 :
7761 0 : return 0;
7762 : }
7763 :
7764 : static int
7765 0 : bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7766 : void *md, uint64_t lba_count, uint64_t lba)
7767 : {
7768 : int rc;
7769 :
7770 0 : SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 " without PI check\n",
7771 : lba_count, lba);
7772 :
7773 0 : bio->iovs = iov;
7774 0 : bio->iovcnt = iovcnt;
7775 0 : bio->iovpos = 0;
7776 0 : bio->iov_offset = 0;
7777 :
7778 0 : rc = spdk_nvme_ns_cmd_readv_with_md(bio->io_path->nvme_ns->ns,
7779 0 : bio->io_path->qpair->qpair,
7780 : lba, lba_count,
7781 : bdev_nvme_no_pi_readv_done, bio, 0,
7782 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
7783 : md, 0, 0);
7784 :
7785 0 : if (rc != 0 && rc != -ENOMEM) {
7786 0 : SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
7787 : }
7788 0 : return rc;
7789 : }
7790 :
7791 : static int
7792 3 : bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7793 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
7794 : struct spdk_memory_domain *domain, void *domain_ctx,
7795 : struct spdk_accel_sequence *seq)
7796 : {
7797 3 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7798 3 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7799 : int rc;
7800 :
7801 3 : SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7802 : lba_count, lba);
7803 :
7804 3 : bio->iovs = iov;
7805 3 : bio->iovcnt = iovcnt;
7806 3 : bio->iovpos = 0;
7807 3 : bio->iov_offset = 0;
7808 :
7809 3 : if (domain != NULL || seq != NULL) {
7810 1 : bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
7811 1 : bio->ext_opts.memory_domain = domain;
7812 1 : bio->ext_opts.memory_domain_ctx = domain_ctx;
7813 1 : bio->ext_opts.io_flags = flags;
7814 1 : bio->ext_opts.metadata = md;
7815 1 : bio->ext_opts.accel_sequence = seq;
7816 :
7817 1 : if (iovcnt == 1) {
7818 1 : rc = spdk_nvme_ns_cmd_read_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_readv_done,
7819 : bio, &bio->ext_opts);
7820 : } else {
7821 0 : rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count,
7822 : bdev_nvme_readv_done, bio,
7823 : bdev_nvme_queued_reset_sgl,
7824 : bdev_nvme_queued_next_sge,
7825 : &bio->ext_opts);
7826 : }
7827 2 : } else if (iovcnt == 1) {
7828 2 : rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base,
7829 : md, lba, lba_count, bdev_nvme_readv_done,
7830 : bio, flags, 0, 0);
7831 : } else {
7832 0 : rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count,
7833 : bdev_nvme_readv_done, bio, flags,
7834 : bdev_nvme_queued_reset_sgl,
7835 : bdev_nvme_queued_next_sge, md, 0, 0);
7836 : }
7837 :
7838 3 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
7839 0 : SPDK_ERRLOG("readv failed: rc = %d\n", rc);
7840 : }
7841 3 : return rc;
7842 : }
7843 :
7844 : static int
7845 25 : bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7846 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
7847 : struct spdk_memory_domain *domain, void *domain_ctx,
7848 : struct spdk_accel_sequence *seq,
7849 : union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13)
7850 : {
7851 25 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7852 25 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7853 : int rc;
7854 :
7855 25 : SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7856 : lba_count, lba);
7857 :
7858 25 : bio->iovs = iov;
7859 25 : bio->iovcnt = iovcnt;
7860 25 : bio->iovpos = 0;
7861 25 : bio->iov_offset = 0;
7862 :
7863 25 : if (domain != NULL || seq != NULL) {
7864 0 : bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
7865 0 : bio->ext_opts.memory_domain = domain;
7866 0 : bio->ext_opts.memory_domain_ctx = domain_ctx;
7867 0 : bio->ext_opts.io_flags = flags | SPDK_NVME_IO_FLAGS_DIRECTIVE(cdw12.write.dtype);
7868 0 : bio->ext_opts.cdw13 = cdw13.raw;
7869 0 : bio->ext_opts.metadata = md;
7870 0 : bio->ext_opts.accel_sequence = seq;
7871 :
7872 0 : if (iovcnt == 1) {
7873 0 : rc = spdk_nvme_ns_cmd_write_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_writev_done,
7874 : bio, &bio->ext_opts);
7875 : } else {
7876 0 : rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count,
7877 : bdev_nvme_writev_done, bio,
7878 : bdev_nvme_queued_reset_sgl,
7879 : bdev_nvme_queued_next_sge,
7880 : &bio->ext_opts);
7881 : }
7882 25 : } else if (iovcnt == 1) {
7883 25 : rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base,
7884 : md, lba, lba_count, bdev_nvme_writev_done,
7885 : bio, flags, 0, 0);
7886 : } else {
7887 0 : rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
7888 : bdev_nvme_writev_done, bio, flags,
7889 : bdev_nvme_queued_reset_sgl,
7890 : bdev_nvme_queued_next_sge, md, 0, 0);
7891 : }
7892 :
7893 25 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
7894 0 : SPDK_ERRLOG("writev failed: rc = %d\n", rc);
7895 : }
7896 25 : return rc;
7897 : }
7898 :
7899 : static int
7900 0 : bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7901 : void *md, uint64_t lba_count, uint64_t zslba,
7902 : uint32_t flags)
7903 : {
7904 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7905 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7906 : int rc;
7907 :
7908 0 : SPDK_DEBUGLOG(bdev_nvme, "zone append %" PRIu64 " blocks to zone start lba %#" PRIx64 "\n",
7909 : lba_count, zslba);
7910 :
7911 0 : bio->iovs = iov;
7912 0 : bio->iovcnt = iovcnt;
7913 0 : bio->iovpos = 0;
7914 0 : bio->iov_offset = 0;
7915 :
7916 0 : if (iovcnt == 1) {
7917 0 : rc = spdk_nvme_zns_zone_append_with_md(ns, qpair, iov[0].iov_base, md, zslba,
7918 : lba_count,
7919 : bdev_nvme_zone_appendv_done, bio,
7920 : flags,
7921 : 0, 0);
7922 : } else {
7923 0 : rc = spdk_nvme_zns_zone_appendv_with_md(ns, qpair, zslba, lba_count,
7924 : bdev_nvme_zone_appendv_done, bio, flags,
7925 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
7926 : md, 0, 0);
7927 : }
7928 :
7929 0 : if (rc != 0 && rc != -ENOMEM) {
7930 0 : SPDK_ERRLOG("zone append failed: rc = %d\n", rc);
7931 : }
7932 0 : return rc;
7933 : }
7934 :
7935 : static int
7936 1 : bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7937 : void *md, uint64_t lba_count, uint64_t lba,
7938 : uint32_t flags)
7939 : {
7940 : int rc;
7941 :
7942 1 : SPDK_DEBUGLOG(bdev_nvme, "compare %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7943 : lba_count, lba);
7944 :
7945 1 : bio->iovs = iov;
7946 1 : bio->iovcnt = iovcnt;
7947 1 : bio->iovpos = 0;
7948 1 : bio->iov_offset = 0;
7949 :
7950 1 : rc = spdk_nvme_ns_cmd_comparev_with_md(bio->io_path->nvme_ns->ns,
7951 1 : bio->io_path->qpair->qpair,
7952 : lba, lba_count,
7953 : bdev_nvme_comparev_done, bio, flags,
7954 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
7955 : md, 0, 0);
7956 :
7957 1 : if (rc != 0 && rc != -ENOMEM) {
7958 0 : SPDK_ERRLOG("comparev failed: rc = %d\n", rc);
7959 : }
7960 1 : return rc;
7961 : }
7962 :
7963 : static int
7964 2 : bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt,
7965 : struct iovec *write_iov, int write_iovcnt,
7966 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags)
7967 : {
7968 2 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7969 2 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7970 2 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7971 : int rc;
7972 :
7973 2 : SPDK_DEBUGLOG(bdev_nvme, "compare and write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7974 : lba_count, lba);
7975 :
7976 2 : bio->iovs = cmp_iov;
7977 2 : bio->iovcnt = cmp_iovcnt;
7978 2 : bio->iovpos = 0;
7979 2 : bio->iov_offset = 0;
7980 2 : bio->fused_iovs = write_iov;
7981 2 : bio->fused_iovcnt = write_iovcnt;
7982 2 : bio->fused_iovpos = 0;
7983 2 : bio->fused_iov_offset = 0;
7984 :
7985 2 : if (bdev_io->num_retries == 0) {
7986 2 : bio->first_fused_submitted = false;
7987 2 : bio->first_fused_completed = false;
7988 : }
7989 :
7990 2 : if (!bio->first_fused_submitted) {
7991 2 : flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST;
7992 2 : memset(&bio->cpl, 0, sizeof(bio->cpl));
7993 :
7994 2 : rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count,
7995 : bdev_nvme_comparev_and_writev_done, bio, flags,
7996 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0);
7997 2 : if (rc == 0) {
7998 2 : bio->first_fused_submitted = true;
7999 2 : flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST;
8000 : } else {
8001 0 : if (rc != -ENOMEM) {
8002 0 : SPDK_ERRLOG("compare failed: rc = %d\n", rc);
8003 : }
8004 0 : return rc;
8005 : }
8006 : }
8007 :
8008 2 : flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND;
8009 :
8010 2 : rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
8011 : bdev_nvme_comparev_and_writev_done, bio, flags,
8012 : bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0);
8013 2 : if (rc != 0 && rc != -ENOMEM) {
8014 0 : SPDK_ERRLOG("write failed: rc = %d\n", rc);
8015 0 : rc = 0;
8016 : }
8017 :
8018 2 : return rc;
8019 : }
8020 :
8021 : static int
8022 1 : bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
8023 : {
8024 1 : struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
8025 : struct spdk_nvme_dsm_range *range;
8026 : uint64_t offset, remaining;
8027 : uint64_t num_ranges_u64;
8028 : uint16_t num_ranges;
8029 : int rc;
8030 :
8031 1 : num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
8032 : SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8033 1 : if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
8034 0 : SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
8035 0 : return -EINVAL;
8036 : }
8037 1 : num_ranges = (uint16_t)num_ranges_u64;
8038 :
8039 1 : offset = offset_blocks;
8040 1 : remaining = num_blocks;
8041 1 : range = &dsm_ranges[0];
8042 :
8043 : /* Fill max-size ranges until the remaining blocks fit into one range */
8044 1 : while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
8045 0 : range->attributes.raw = 0;
8046 0 : range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8047 0 : range->starting_lba = offset;
8048 :
8049 0 : offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8050 0 : remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8051 0 : range++;
8052 : }
8053 :
8054 : /* Final range describes the remaining blocks */
8055 1 : range->attributes.raw = 0;
8056 1 : range->length = remaining;
8057 1 : range->starting_lba = offset;
8058 :
8059 1 : rc = spdk_nvme_ns_cmd_dataset_management(bio->io_path->nvme_ns->ns,
8060 1 : bio->io_path->qpair->qpair,
8061 : SPDK_NVME_DSM_ATTR_DEALLOCATE,
8062 : dsm_ranges, num_ranges,
8063 : bdev_nvme_queued_done, bio);
8064 :
8065 1 : return rc;
8066 : }
8067 :
8068 : static int
8069 0 : bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
8070 : {
8071 0 : if (num_blocks > UINT16_MAX + 1) {
8072 0 : SPDK_ERRLOG("NVMe write zeroes is limited to 16-bit block count\n");
8073 0 : return -EINVAL;
8074 : }
8075 :
8076 0 : return spdk_nvme_ns_cmd_write_zeroes(bio->io_path->nvme_ns->ns,
8077 0 : bio->io_path->qpair->qpair,
8078 : offset_blocks, num_blocks,
8079 : bdev_nvme_queued_done, bio,
8080 : 0);
8081 : }
8082 :
8083 : static int
8084 0 : bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones,
8085 : struct spdk_bdev_zone_info *info)
8086 : {
8087 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8088 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8089 0 : uint32_t zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
8090 0 : uint64_t zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
8091 0 : uint64_t total_zones = spdk_nvme_zns_ns_get_num_zones(ns);
8092 :
8093 0 : if (zone_id % zone_size != 0) {
8094 0 : return -EINVAL;
8095 : }
8096 :
8097 0 : if (num_zones > total_zones || !num_zones) {
8098 0 : return -EINVAL;
8099 : }
8100 :
8101 0 : assert(!bio->zone_report_buf);
8102 0 : bio->zone_report_buf = calloc(1, zone_report_bufsize);
8103 0 : if (!bio->zone_report_buf) {
8104 0 : return -ENOMEM;
8105 : }
8106 :
8107 0 : bio->handled_zones = 0;
8108 :
8109 0 : return spdk_nvme_zns_report_zones(ns, qpair, bio->zone_report_buf, zone_report_bufsize,
8110 : zone_id, SPDK_NVME_ZRA_LIST_ALL, true,
8111 : bdev_nvme_get_zone_info_done, bio);
8112 : }
8113 :
8114 : static int
8115 0 : bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
8116 : enum spdk_bdev_zone_action action)
8117 : {
8118 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8119 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8120 :
8121 0 : switch (action) {
8122 0 : case SPDK_BDEV_ZONE_CLOSE:
8123 0 : return spdk_nvme_zns_close_zone(ns, qpair, zone_id, false,
8124 : bdev_nvme_zone_management_done, bio);
8125 0 : case SPDK_BDEV_ZONE_FINISH:
8126 0 : return spdk_nvme_zns_finish_zone(ns, qpair, zone_id, false,
8127 : bdev_nvme_zone_management_done, bio);
8128 0 : case SPDK_BDEV_ZONE_OPEN:
8129 0 : return spdk_nvme_zns_open_zone(ns, qpair, zone_id, false,
8130 : bdev_nvme_zone_management_done, bio);
8131 0 : case SPDK_BDEV_ZONE_RESET:
8132 0 : return spdk_nvme_zns_reset_zone(ns, qpair, zone_id, false,
8133 : bdev_nvme_zone_management_done, bio);
8134 0 : case SPDK_BDEV_ZONE_OFFLINE:
8135 0 : return spdk_nvme_zns_offline_zone(ns, qpair, zone_id, false,
8136 : bdev_nvme_zone_management_done, bio);
8137 0 : default:
8138 0 : return -EINVAL;
8139 : }
8140 : }
8141 :
8142 : static void
8143 5 : bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
8144 : struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
8145 : {
8146 : struct nvme_io_path *io_path;
8147 : struct nvme_ctrlr *nvme_ctrlr;
8148 : uint32_t max_xfer_size;
8149 5 : int rc = -ENXIO;
8150 :
8151 : /* Choose the first ctrlr which is not failed. */
8152 8 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
8153 7 : nvme_ctrlr = io_path->qpair->ctrlr;
8154 :
8155 : /* We should skip any unavailable nvme_ctrlr rather than checking
8156 : * if the return value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
8157 : */
8158 7 : if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
8159 3 : continue;
8160 : }
8161 :
8162 4 : max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ctrlr->ctrlr);
8163 :
8164 4 : if (nbytes > max_xfer_size) {
8165 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8166 0 : rc = -EINVAL;
8167 0 : goto err;
8168 : }
8169 :
8170 4 : rc = spdk_nvme_ctrlr_cmd_admin_raw(nvme_ctrlr->ctrlr, cmd, buf, (uint32_t)nbytes,
8171 : bdev_nvme_admin_passthru_done, bio);
8172 4 : if (rc == 0) {
8173 4 : return;
8174 : }
8175 : }
8176 :
8177 1 : err:
8178 1 : bdev_nvme_admin_complete(bio, rc);
8179 : }
8180 :
8181 : static int
8182 0 : bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
8183 : void *buf, size_t nbytes)
8184 : {
8185 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8186 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8187 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8188 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8189 :
8190 0 : if (nbytes > max_xfer_size) {
8191 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8192 0 : return -EINVAL;
8193 : }
8194 :
8195 : /*
8196 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
8197 : * so fill it out automatically.
8198 : */
8199 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8200 :
8201 0 : return spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, cmd, buf,
8202 : (uint32_t)nbytes, bdev_nvme_queued_done, bio);
8203 : }
8204 :
8205 : static int
8206 0 : bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
8207 : void *buf, size_t nbytes, void *md_buf, size_t md_len)
8208 : {
8209 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8210 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8211 0 : size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
8212 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8213 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8214 :
8215 0 : if (nbytes > max_xfer_size) {
8216 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8217 0 : return -EINVAL;
8218 : }
8219 :
8220 0 : if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
8221 0 : SPDK_ERRLOG("invalid meta data buffer size\n");
8222 0 : return -EINVAL;
8223 : }
8224 :
8225 : /*
8226 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
8227 : * so fill it out automatically.
8228 : */
8229 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8230 :
8231 0 : return spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, qpair, cmd, buf,
8232 : (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
8233 : }
8234 :
8235 : static int
8236 0 : bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio,
8237 : struct spdk_nvme_cmd *cmd, struct iovec *iov, int iovcnt,
8238 : size_t nbytes, void *md_buf, size_t md_len)
8239 : {
8240 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8241 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8242 0 : size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
8243 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8244 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8245 :
8246 0 : bio->iovs = iov;
8247 0 : bio->iovcnt = iovcnt;
8248 0 : bio->iovpos = 0;
8249 0 : bio->iov_offset = 0;
8250 :
8251 0 : if (nbytes > max_xfer_size) {
8252 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8253 0 : return -EINVAL;
8254 : }
8255 :
8256 0 : if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
8257 0 : SPDK_ERRLOG("invalid meta data buffer size\n");
8258 0 : return -EINVAL;
8259 : }
8260 :
8261 : /*
8262 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands
8263 : * require a nsid, so fill it out automatically.
8264 : */
8265 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8266 :
8267 0 : return spdk_nvme_ctrlr_cmd_iov_raw_with_md(
8268 : ctrlr, qpair, cmd, (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio,
8269 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
8270 : }
8271 :
8272 : static void
8273 6 : bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
8274 : struct nvme_bdev_io *bio_to_abort)
8275 : {
8276 : struct nvme_io_path *io_path;
8277 6 : int rc = 0;
8278 :
8279 6 : rc = bdev_nvme_abort_retry_io(nbdev_ch, bio_to_abort);
8280 6 : if (rc == 0) {
8281 1 : bdev_nvme_admin_complete(bio, 0);
8282 1 : return;
8283 : }
8284 :
8285 5 : io_path = bio_to_abort->io_path;
8286 5 : if (io_path != NULL) {
8287 3 : rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
8288 3 : io_path->qpair->qpair,
8289 : bio_to_abort,
8290 : bdev_nvme_abort_done, bio);
8291 : } else {
8292 3 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
8293 2 : rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
8294 : NULL,
8295 : bio_to_abort,
8296 : bdev_nvme_abort_done, bio);
8297 :
8298 2 : if (rc != -ENOENT) {
8299 1 : break;
8300 : }
8301 : }
8302 : }
8303 :
8304 5 : if (rc != 0) {
8305 : /* If no command was found or there was any error, complete the abort
8306 : * request with failure.
8307 : */
8308 2 : bdev_nvme_admin_complete(bio, rc);
8309 : }
8310 : }
8311 :
8312 : static int
8313 0 : bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
8314 : uint64_t num_blocks)
8315 : {
8316 0 : struct spdk_nvme_scc_source_range range = {
8317 : .slba = src_offset_blocks,
8318 0 : .nlb = num_blocks - 1
8319 : };
8320 :
8321 0 : return spdk_nvme_ns_cmd_copy(bio->io_path->nvme_ns->ns,
8322 0 : bio->io_path->qpair->qpair,
8323 : &range, 1, dst_offset_blocks,
8324 : bdev_nvme_queued_done, bio);
8325 : }
8326 :
8327 : static void
8328 0 : bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
8329 : {
8330 : const char *action;
8331 : uint32_t i;
8332 :
8333 0 : if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
8334 0 : action = "reset";
8335 0 : } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
8336 0 : action = "abort";
8337 : } else {
8338 0 : action = "none";
8339 : }
8340 :
8341 0 : spdk_json_write_object_begin(w);
8342 :
8343 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_set_options");
8344 :
8345 0 : spdk_json_write_named_object_begin(w, "params");
8346 0 : spdk_json_write_named_string(w, "action_on_timeout", action);
8347 0 : spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
8348 0 : spdk_json_write_named_uint64(w, "timeout_admin_us", g_opts.timeout_admin_us);
8349 0 : spdk_json_write_named_uint32(w, "keep_alive_timeout_ms", g_opts.keep_alive_timeout_ms);
8350 0 : spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst);
8351 0 : spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight);
8352 0 : spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight);
8353 0 : spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight);
8354 0 : spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
8355 0 : spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
8356 0 : spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
8357 0 : spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
8358 0 : spdk_json_write_named_uint32(w, "transport_retry_count", g_opts.transport_retry_count);
8359 0 : spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
8360 0 : spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
8361 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
8362 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
8363 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
8364 0 : spdk_json_write_named_bool(w, "disable_auto_failback", g_opts.disable_auto_failback);
8365 0 : spdk_json_write_named_bool(w, "generate_uuids", g_opts.generate_uuids);
8366 0 : spdk_json_write_named_uint8(w, "transport_tos", g_opts.transport_tos);
8367 0 : spdk_json_write_named_bool(w, "nvme_error_stat", g_opts.nvme_error_stat);
8368 0 : spdk_json_write_named_uint32(w, "rdma_srq_size", g_opts.rdma_srq_size);
8369 0 : spdk_json_write_named_bool(w, "io_path_stat", g_opts.io_path_stat);
8370 0 : spdk_json_write_named_bool(w, "allow_accel_sequence", g_opts.allow_accel_sequence);
8371 0 : spdk_json_write_named_uint32(w, "rdma_max_cq_size", g_opts.rdma_max_cq_size);
8372 0 : spdk_json_write_named_uint16(w, "rdma_cm_event_timeout_ms", g_opts.rdma_cm_event_timeout_ms);
8373 0 : spdk_json_write_named_array_begin(w, "dhchap_digests");
8374 0 : for (i = 0; i < 32; ++i) {
8375 0 : if (g_opts.dhchap_digests & SPDK_BIT(i)) {
8376 0 : spdk_json_write_string(w, spdk_nvme_dhchap_get_digest_name(i));
8377 : }
8378 : }
8379 0 : spdk_json_write_array_end(w);
8380 0 : spdk_json_write_named_array_begin(w, "dhchap_dhgroups");
8381 0 : for (i = 0; i < 32; ++i) {
8382 0 : if (g_opts.dhchap_dhgroups & SPDK_BIT(i)) {
8383 0 : spdk_json_write_string(w, spdk_nvme_dhchap_get_dhgroup_name(i));
8384 : }
8385 : }
8386 :
8387 0 : spdk_json_write_array_end(w);
8388 0 : spdk_json_write_object_end(w);
8389 :
8390 0 : spdk_json_write_object_end(w);
8391 0 : }
8392 :
8393 : static void
8394 0 : bdev_nvme_discovery_config_json(struct spdk_json_write_ctx *w, struct discovery_ctx *ctx)
8395 : {
8396 0 : struct spdk_nvme_transport_id trid;
8397 :
8398 0 : spdk_json_write_object_begin(w);
8399 :
8400 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_start_discovery");
8401 :
8402 0 : spdk_json_write_named_object_begin(w, "params");
8403 0 : spdk_json_write_named_string(w, "name", ctx->name);
8404 0 : spdk_json_write_named_string(w, "hostnqn", ctx->hostnqn);
8405 :
8406 0 : trid = ctx->trid;
8407 0 : memset(trid.subnqn, 0, sizeof(trid.subnqn));
8408 0 : nvme_bdev_dump_trid_json(&trid, w);
8409 :
8410 0 : spdk_json_write_named_bool(w, "wait_for_attach", ctx->wait_for_attach);
8411 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", ctx->bdev_opts.ctrlr_loss_timeout_sec);
8412 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", ctx->bdev_opts.reconnect_delay_sec);
8413 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
8414 : ctx->bdev_opts.fast_io_fail_timeout_sec);
8415 0 : spdk_json_write_object_end(w);
8416 :
8417 0 : spdk_json_write_object_end(w);
8418 0 : }
8419 :
8420 : #ifdef SPDK_CONFIG_NVME_CUSE
8421 : static void
8422 0 : nvme_ctrlr_cuse_config_json(struct spdk_json_write_ctx *w,
8423 : struct nvme_ctrlr *nvme_ctrlr)
8424 0 : {
8425 0 : size_t cuse_name_size = 128;
8426 0 : char cuse_name[cuse_name_size];
8427 :
8428 0 : if (spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr,
8429 : cuse_name, &cuse_name_size) != 0) {
8430 0 : return;
8431 : }
8432 :
8433 0 : spdk_json_write_object_begin(w);
8434 :
8435 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_cuse_register");
8436 :
8437 0 : spdk_json_write_named_object_begin(w, "params");
8438 0 : spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
8439 0 : spdk_json_write_object_end(w);
8440 :
8441 0 : spdk_json_write_object_end(w);
8442 : }
8443 : #endif
8444 :
8445 : static void
8446 0 : nvme_ctrlr_config_json(struct spdk_json_write_ctx *w,
8447 : struct nvme_ctrlr *nvme_ctrlr)
8448 : {
8449 : struct spdk_nvme_transport_id *trid;
8450 : const struct spdk_nvme_ctrlr_opts *opts;
8451 :
8452 0 : if (nvme_ctrlr->opts.from_discovery_service) {
8453 : /* Do not emit an RPC for this - it will be implicitly
8454 : * covered by a separate bdev_nvme_start_discovery or
8455 : * bdev_nvme_start_mdns_discovery RPC.
8456 : */
8457 0 : return;
8458 : }
8459 :
8460 0 : trid = &nvme_ctrlr->active_path_id->trid;
8461 :
8462 0 : spdk_json_write_object_begin(w);
8463 :
8464 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller");
8465 :
8466 0 : spdk_json_write_named_object_begin(w, "params");
8467 0 : spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
8468 0 : nvme_bdev_dump_trid_json(trid, w);
8469 0 : spdk_json_write_named_bool(w, "prchk_reftag",
8470 0 : (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
8471 0 : spdk_json_write_named_bool(w, "prchk_guard",
8472 0 : (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
8473 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->opts.ctrlr_loss_timeout_sec);
8474 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->opts.reconnect_delay_sec);
8475 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
8476 : nvme_ctrlr->opts.fast_io_fail_timeout_sec);
8477 0 : if (nvme_ctrlr->psk != NULL) {
8478 0 : spdk_json_write_named_string(w, "psk", spdk_key_get_name(nvme_ctrlr->psk));
8479 0 : } else if (nvme_ctrlr->opts.psk[0] != '\0') {
8480 0 : spdk_json_write_named_string(w, "psk", nvme_ctrlr->opts.psk);
8481 : }
8482 :
8483 0 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
8484 0 : spdk_json_write_named_string(w, "hostnqn", opts->hostnqn);
8485 0 : spdk_json_write_named_bool(w, "hdgst", opts->header_digest);
8486 0 : spdk_json_write_named_bool(w, "ddgst", opts->data_digest);
8487 0 : if (opts->src_addr[0] != '\0') {
8488 0 : spdk_json_write_named_string(w, "hostaddr", opts->src_addr);
8489 : }
8490 0 : if (opts->src_svcid[0] != '\0') {
8491 0 : spdk_json_write_named_string(w, "hostsvcid", opts->src_svcid);
8492 : }
8493 :
8494 0 : spdk_json_write_object_end(w);
8495 :
8496 0 : spdk_json_write_object_end(w);
8497 : }
8498 :
8499 : static void
8500 0 : bdev_nvme_hotplug_config_json(struct spdk_json_write_ctx *w)
8501 : {
8502 0 : spdk_json_write_object_begin(w);
8503 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug");
8504 :
8505 0 : spdk_json_write_named_object_begin(w, "params");
8506 0 : spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
8507 0 : spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
8508 0 : spdk_json_write_object_end(w);
8509 :
8510 0 : spdk_json_write_object_end(w);
8511 0 : }
8512 :
8513 : static int
8514 0 : bdev_nvme_config_json(struct spdk_json_write_ctx *w)
8515 : {
8516 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
8517 : struct nvme_ctrlr *nvme_ctrlr;
8518 : struct discovery_ctx *ctx;
8519 :
8520 0 : bdev_nvme_opts_config_json(w);
8521 :
8522 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
8523 :
8524 0 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
8525 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
8526 0 : nvme_ctrlr_config_json(w, nvme_ctrlr);
8527 :
8528 : #ifdef SPDK_CONFIG_NVME_CUSE
8529 0 : nvme_ctrlr_cuse_config_json(w, nvme_ctrlr);
8530 : #endif
8531 : }
8532 : }
8533 :
8534 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
8535 0 : if (!ctx->from_mdns_discovery_service) {
8536 0 : bdev_nvme_discovery_config_json(w, ctx);
8537 : }
8538 : }
8539 :
8540 0 : bdev_nvme_mdns_discovery_config_json(w);
8541 :
8542 : /* Dump as last parameter to give all NVMe bdevs chance to be constructed
8543 : * before enabling hotplug poller.
8544 : */
8545 0 : bdev_nvme_hotplug_config_json(w);
8546 :
8547 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
8548 0 : return 0;
8549 : }
8550 :
8551 : struct spdk_nvme_ctrlr *
8552 1 : bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
8553 : {
8554 : struct nvme_bdev *nbdev;
8555 : struct nvme_ns *nvme_ns;
8556 :
8557 1 : if (!bdev || bdev->module != &nvme_if) {
8558 0 : return NULL;
8559 : }
8560 :
8561 1 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
8562 1 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
8563 1 : assert(nvme_ns != NULL);
8564 :
8565 1 : return nvme_ns->ctrlr->ctrlr;
8566 : }
8567 :
8568 : static bool
8569 12 : nvme_io_path_is_current(struct nvme_io_path *io_path)
8570 : {
8571 : const struct nvme_bdev_channel *nbdev_ch;
8572 : bool current;
8573 :
8574 12 : if (!nvme_io_path_is_available(io_path)) {
8575 4 : return false;
8576 : }
8577 :
8578 8 : nbdev_ch = io_path->nbdev_ch;
8579 8 : if (nbdev_ch == NULL) {
8580 1 : current = false;
8581 7 : } else if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
8582 3 : struct nvme_io_path *optimized_io_path = NULL;
8583 :
8584 6 : STAILQ_FOREACH(optimized_io_path, &nbdev_ch->io_path_list, stailq) {
8585 5 : if (optimized_io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) {
8586 2 : break;
8587 : }
8588 : }
8589 :
8590 : /* A non-optimized path is only current if there are no optimized paths. */
8591 3 : current = (io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) ||
8592 : (optimized_io_path == NULL);
8593 : } else {
8594 4 : if (nbdev_ch->current_io_path) {
8595 1 : current = (io_path == nbdev_ch->current_io_path);
8596 : } else {
8597 : struct nvme_io_path *first_path;
8598 :
8599 : /* We arrived here as there are no optimized paths for active-passive
8600 : * mode. Check if this io_path is the first one available on the list.
8601 : */
8602 3 : current = false;
8603 3 : STAILQ_FOREACH(first_path, &nbdev_ch->io_path_list, stailq) {
8604 3 : if (nvme_io_path_is_available(first_path)) {
8605 3 : current = (io_path == first_path);
8606 3 : break;
8607 : }
8608 : }
8609 : }
8610 : }
8611 :
8612 8 : return current;
8613 : }
8614 :
8615 : void
8616 0 : nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path)
8617 : {
8618 0 : struct nvme_ns *nvme_ns = io_path->nvme_ns;
8619 0 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
8620 : const struct spdk_nvme_ctrlr_data *cdata;
8621 : const struct spdk_nvme_transport_id *trid;
8622 : const char *adrfam_str;
8623 :
8624 0 : spdk_json_write_object_begin(w);
8625 :
8626 0 : spdk_json_write_named_string(w, "bdev_name", nvme_ns->bdev->disk.name);
8627 :
8628 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
8629 0 : trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);
8630 :
8631 0 : spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
8632 0 : spdk_json_write_named_bool(w, "current", nvme_io_path_is_current(io_path));
8633 0 : spdk_json_write_named_bool(w, "connected", nvme_qpair_is_connected(io_path->qpair));
8634 0 : spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));
8635 :
8636 0 : spdk_json_write_named_object_begin(w, "transport");
8637 0 : spdk_json_write_named_string(w, "trtype", trid->trstring);
8638 0 : spdk_json_write_named_string(w, "traddr", trid->traddr);
8639 0 : if (trid->trsvcid[0] != '\0') {
8640 0 : spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
8641 : }
8642 0 : adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
8643 0 : if (adrfam_str) {
8644 0 : spdk_json_write_named_string(w, "adrfam", adrfam_str);
8645 : }
8646 0 : spdk_json_write_object_end(w);
8647 :
8648 0 : spdk_json_write_object_end(w);
8649 0 : }
8650 :
8651 : void
8652 0 : bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w)
8653 : {
8654 : struct discovery_ctx *ctx;
8655 : struct discovery_entry_ctx *entry_ctx;
8656 :
8657 0 : spdk_json_write_array_begin(w);
8658 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
8659 0 : spdk_json_write_object_begin(w);
8660 0 : spdk_json_write_named_string(w, "name", ctx->name);
8661 :
8662 0 : spdk_json_write_named_object_begin(w, "trid");
8663 0 : nvme_bdev_dump_trid_json(&ctx->trid, w);
8664 0 : spdk_json_write_object_end(w);
8665 :
8666 0 : spdk_json_write_named_array_begin(w, "referrals");
8667 0 : TAILQ_FOREACH(entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
8668 0 : spdk_json_write_object_begin(w);
8669 0 : spdk_json_write_named_object_begin(w, "trid");
8670 0 : nvme_bdev_dump_trid_json(&entry_ctx->trid, w);
8671 0 : spdk_json_write_object_end(w);
8672 0 : spdk_json_write_object_end(w);
8673 : }
8674 0 : spdk_json_write_array_end(w);
8675 :
8676 0 : spdk_json_write_object_end(w);
8677 : }
8678 0 : spdk_json_write_array_end(w);
8679 0 : }
8680 :
8681 1 : SPDK_LOG_REGISTER_COMPONENT(bdev_nvme)
8682 :
8683 1 : SPDK_TRACE_REGISTER_FN(bdev_nvme_trace, "bdev_nvme", TRACE_GROUP_BDEV_NVME)
8684 : {
8685 0 : struct spdk_trace_tpoint_opts opts[] = {
8686 : {
8687 : "BDEV_NVME_IO_START", TRACE_BDEV_NVME_IO_START,
8688 : OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 1,
8689 : {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
8690 : },
8691 : {
8692 : "BDEV_NVME_IO_DONE", TRACE_BDEV_NVME_IO_DONE,
8693 : OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 0,
8694 : {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
8695 : }
8696 : };
8697 :
8698 :
8699 0 : spdk_trace_register_object(OBJECT_BDEV_NVME_IO, 'N');
8700 0 : spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
8701 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
8702 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
8703 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
8704 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
8705 0 : }
|