Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2016 Intel Corporation. All rights reserved.
3 : * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4 : * Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : * Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved.
6 : */
7 :
8 : #include "spdk/stdinc.h"
9 :
10 : #include "bdev_nvme.h"
11 :
12 : #include "spdk/accel.h"
13 : #include "spdk/config.h"
14 : #include "spdk/endian.h"
15 : #include "spdk/bdev.h"
16 : #include "spdk/json.h"
17 : #include "spdk/keyring.h"
18 : #include "spdk/likely.h"
19 : #include "spdk/nvme.h"
20 : #include "spdk/nvme_ocssd.h"
21 : #include "spdk/nvme_zns.h"
22 : #include "spdk/opal.h"
23 : #include "spdk/thread.h"
24 : #include "spdk/trace.h"
25 : #include "spdk/string.h"
26 : #include "spdk/util.h"
27 : #include "spdk/uuid.h"
28 :
29 : #include "spdk/bdev_module.h"
30 : #include "spdk/log.h"
31 :
32 : #include "spdk_internal/usdt.h"
33 : #include "spdk_internal/trace_defs.h"
34 :
35 : #define CTRLR_STRING(nvme_ctrlr) \
36 : (spdk_nvme_trtype_is_fabrics(nvme_ctrlr->active_path_id->trid.trtype) ? \
37 : nvme_ctrlr->active_path_id->trid.subnqn : nvme_ctrlr->active_path_id->trid.traddr)
38 :
39 : #define CTRLR_ID(nvme_ctrlr) (spdk_nvme_ctrlr_get_id(nvme_ctrlr->ctrlr))
40 :
41 : #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \
42 : SPDK_ERRLOG("[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
43 :
44 : #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \
45 : SPDK_WARNLOG("[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
46 :
47 : #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \
48 : SPDK_NOTICELOG("[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
49 :
50 : #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \
51 : SPDK_INFOLOG(bdev_nvme, "[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
52 :
53 : #ifdef DEBUG
54 : #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \
55 : SPDK_DEBUGLOG(bdev_nvme, "[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
56 : #else
57 : #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0)
58 : #endif
59 :
60 : #define BDEV_STRING(nbdev) (nbdev->disk.name)
61 :
62 : #define NVME_BDEV_ERRLOG(nbdev, format, ...) \
63 : SPDK_ERRLOG("[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
64 :
65 : #define NVME_BDEV_WARNLOG(nbdev, format, ...) \
66 : SPDK_WARNLOG("[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
67 :
68 : #define NVME_BDEV_NOTICELOG(nbdev, format, ...) \
69 : SPDK_NOTICELOG("[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
70 :
71 : #define NVME_BDEV_INFOLOG(nbdev, format, ...) \
72 : SPDK_INFOLOG(bdev_nvme, "[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
73 :
74 : #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true
75 : #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
76 :
77 : #define NSID_STR_LEN 10
78 :
79 : #define SPDK_CONTROLLER_NAME_MAX 512
80 :
81 : static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
82 :
83 : struct nvme_bdev_io {
84 : /** array of iovecs to transfer. */
85 : struct iovec *iovs;
86 :
87 : /** Number of iovecs in iovs array. */
88 : int iovcnt;
89 :
90 : /** Current iovec position. */
91 : int iovpos;
92 :
93 : /** Offset in current iovec. */
94 : uint32_t iov_offset;
95 :
96 : /** Offset in current iovec. */
97 : uint32_t fused_iov_offset;
98 :
99 : /** array of iovecs to transfer. */
100 : struct iovec *fused_iovs;
101 :
102 : /** Number of iovecs in iovs array. */
103 : int fused_iovcnt;
104 :
105 : /** Current iovec position. */
106 : int fused_iovpos;
107 :
108 : /** I/O path the current I/O or admin passthrough is submitted on, or the I/O path
109 : * being reset in a reset I/O.
110 : */
111 : struct nvme_io_path *io_path;
112 :
113 : /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */
114 : struct spdk_nvme_cpl cpl;
115 :
116 : /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */
117 : struct spdk_nvme_ns_cmd_ext_io_opts ext_opts;
118 :
119 : /** Keeps track if first of fused commands was submitted */
120 : bool first_fused_submitted;
121 :
122 : /** Keeps track if first of fused commands was completed */
123 : bool first_fused_completed;
124 :
125 : /* How many times the current I/O was retried. */
126 : int32_t retry_count;
127 :
128 : /** Expiration value in ticks to retry the current I/O. */
129 : uint64_t retry_ticks;
130 :
131 : /** Temporary pointer to zone report buffer */
132 : struct spdk_nvme_zns_zone_report *zone_report_buf;
133 :
134 : /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */
135 : uint64_t handled_zones;
136 :
137 : /* Current tsc at submit time. */
138 : uint64_t submit_tsc;
139 :
140 : /* Used to put nvme_bdev_io into the list */
141 : TAILQ_ENTRY(nvme_bdev_io) retry_link;
142 : };
143 :
144 : struct nvme_probe_skip_entry {
145 : struct spdk_nvme_transport_id trid;
146 : TAILQ_ENTRY(nvme_probe_skip_entry) tailq;
147 : };
148 : /* All the controllers deleted by users via RPC are skipped by hotplug monitor */
149 : static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
150 : g_skipped_nvme_ctrlrs);
151 :
152 : #define BDEV_NVME_DEFAULT_DIGESTS (SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA256) | \
153 : SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA384) | \
154 : SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA512))
155 :
156 : #define BDEV_NVME_DEFAULT_DHGROUPS (SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_NULL) | \
157 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_2048) | \
158 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_3072) | \
159 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_4096) | \
160 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_6144) | \
161 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_8192))
162 :
163 : static struct spdk_bdev_nvme_opts g_opts = {
164 : .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
165 : .timeout_us = 0,
166 : .timeout_admin_us = 0,
167 : .keep_alive_timeout_ms = SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS,
168 : .transport_retry_count = 4,
169 : .arbitration_burst = 0,
170 : .low_priority_weight = 0,
171 : .medium_priority_weight = 0,
172 : .high_priority_weight = 0,
173 : .nvme_adminq_poll_period_us = 10000ULL,
174 : .nvme_ioq_poll_period_us = 0,
175 : .io_queue_requests = 0,
176 : .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
177 : .bdev_retry_count = 3,
178 : .transport_ack_timeout = 0,
179 : .ctrlr_loss_timeout_sec = 0,
180 : .reconnect_delay_sec = 0,
181 : .fast_io_fail_timeout_sec = 0,
182 : .disable_auto_failback = false,
183 : .generate_uuids = false,
184 : .transport_tos = 0,
185 : .nvme_error_stat = false,
186 : .io_path_stat = false,
187 : .allow_accel_sequence = false,
188 : .dhchap_digests = BDEV_NVME_DEFAULT_DIGESTS,
189 : .dhchap_dhgroups = BDEV_NVME_DEFAULT_DHGROUPS,
190 : };
191 :
192 : #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL
193 : #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL
194 :
195 : static int g_hot_insert_nvme_controller_index = 0;
196 : static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
197 : static bool g_nvme_hotplug_enabled = false;
198 : struct spdk_thread *g_bdev_nvme_init_thread;
199 : static struct spdk_poller *g_hotplug_poller;
200 : static struct spdk_poller *g_hotplug_probe_poller;
201 : static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
202 :
203 : static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
204 : struct nvme_async_probe_ctx *ctx);
205 : static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
206 : struct nvme_async_probe_ctx *ctx);
207 : static int bdev_nvme_library_init(void);
208 : static void bdev_nvme_library_fini(void);
209 : static void _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch,
210 : struct spdk_bdev_io *bdev_io);
211 : static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
212 : struct spdk_bdev_io *bdev_io);
213 : static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
214 : void *md, uint64_t lba_count, uint64_t lba,
215 : uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
216 : struct spdk_accel_sequence *seq);
217 : static int bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
218 : void *md, uint64_t lba_count, uint64_t lba);
219 : static int bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
220 : void *md, uint64_t lba_count, uint64_t lba,
221 : uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
222 : struct spdk_accel_sequence *seq,
223 : union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13);
224 : static int bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
225 : void *md, uint64_t lba_count,
226 : uint64_t zslba, uint32_t flags);
227 : static int bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
228 : void *md, uint64_t lba_count, uint64_t lba,
229 : uint32_t flags);
230 : static int bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio,
231 : struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
232 : int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba,
233 : uint32_t flags);
234 : static int bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id,
235 : uint32_t num_zones, struct spdk_bdev_zone_info *info);
236 : static int bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
237 : enum spdk_bdev_zone_action action);
238 : static void bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch,
239 : struct nvme_bdev_io *bio,
240 : struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
241 : static int bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
242 : void *buf, size_t nbytes);
243 : static int bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
244 : void *buf, size_t nbytes, void *md_buf, size_t md_len);
245 : static int bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
246 : struct iovec *iov, int iovcnt, size_t nbytes,
247 : void *md_buf, size_t md_len);
248 : static void bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch,
249 : struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
250 : static void bdev_nvme_reset_io(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio);
251 : static int bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
252 : static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
253 : static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
254 : static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr);
255 :
256 : static struct nvme_ns *nvme_ns_alloc(void);
257 : static void nvme_ns_free(struct nvme_ns *ns);
258 :
259 : static int
260 175 : nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2)
261 : {
262 175 : return ns1->id < ns2->id ? -1 : ns1->id > ns2->id;
263 : }
264 :
265 935 : RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp);
266 :
267 : struct spdk_nvme_qpair *
268 1 : bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
269 : {
270 : struct nvme_ctrlr_channel *ctrlr_ch;
271 :
272 1 : assert(ctrlr_io_ch != NULL);
273 :
274 1 : ctrlr_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
275 :
276 1 : return ctrlr_ch->qpair->qpair;
277 : }
278 :
279 : static int
280 0 : bdev_nvme_get_ctx_size(void)
281 : {
282 0 : return sizeof(struct nvme_bdev_io);
283 : }
284 :
285 : static struct spdk_bdev_module nvme_if = {
286 : .name = "nvme",
287 : .async_fini = true,
288 : .module_init = bdev_nvme_library_init,
289 : .module_fini = bdev_nvme_library_fini,
290 : .config_json = bdev_nvme_config_json,
291 : .get_ctx_size = bdev_nvme_get_ctx_size,
292 :
293 : };
294 1 : SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
295 :
296 : struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_bdev_ctrlrs);
297 : pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
298 : bool g_bdev_nvme_module_finish;
299 :
300 : struct nvme_bdev_ctrlr *
301 327 : nvme_bdev_ctrlr_get_by_name(const char *name)
302 : {
303 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
304 :
305 327 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
306 169 : if (strcmp(name, nbdev_ctrlr->name) == 0) {
307 169 : break;
308 : }
309 : }
310 :
311 327 : return nbdev_ctrlr;
312 : }
313 :
314 : static struct nvme_ctrlr *
315 58 : nvme_bdev_ctrlr_get_ctrlr(struct nvme_bdev_ctrlr *nbdev_ctrlr,
316 : const struct spdk_nvme_transport_id *trid, const char *hostnqn)
317 : {
318 : const struct spdk_nvme_ctrlr_opts *opts;
319 : struct nvme_ctrlr *nvme_ctrlr;
320 :
321 99 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
322 74 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
323 74 : if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->active_path_id->trid) == 0 &&
324 33 : strcmp(hostnqn, opts->hostnqn) == 0) {
325 33 : break;
326 : }
327 : }
328 :
329 58 : return nvme_ctrlr;
330 : }
331 :
332 : struct nvme_ctrlr *
333 0 : nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr,
334 : uint16_t cntlid)
335 : {
336 : struct nvme_ctrlr *nvme_ctrlr;
337 : const struct spdk_nvme_ctrlr_data *cdata;
338 :
339 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
340 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
341 0 : if (cdata->cntlid == cntlid) {
342 0 : break;
343 : }
344 : }
345 :
346 0 : return nvme_ctrlr;
347 : }
348 :
349 : static struct nvme_bdev *
350 73 : nvme_bdev_ctrlr_get_bdev(struct nvme_bdev_ctrlr *nbdev_ctrlr, uint32_t nsid)
351 : {
352 : struct nvme_bdev *bdev;
353 :
354 73 : pthread_mutex_lock(&g_bdev_nvme_mutex);
355 107 : TAILQ_FOREACH(bdev, &nbdev_ctrlr->bdevs, tailq) {
356 68 : if (bdev->nsid == nsid) {
357 34 : break;
358 : }
359 : }
360 73 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
361 :
362 73 : return bdev;
363 : }
364 :
365 : struct nvme_ns *
366 143 : nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
367 : {
368 143 : struct nvme_ns ns;
369 :
370 143 : assert(nsid > 0);
371 :
372 143 : ns.id = nsid;
373 143 : return RB_FIND(nvme_ns_tree, &nvme_ctrlr->namespaces, &ns);
374 : }
375 :
376 : struct nvme_ns *
377 162 : nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr)
378 : {
379 162 : return RB_MIN(nvme_ns_tree, &nvme_ctrlr->namespaces);
380 : }
381 :
382 : struct nvme_ns *
383 72 : nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns)
384 : {
385 72 : if (ns == NULL) {
386 0 : return NULL;
387 : }
388 :
389 72 : return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
390 : }
391 :
392 : static struct nvme_ctrlr *
393 52 : nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid, const char *hostnqn)
394 : {
395 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
396 52 : struct nvme_ctrlr *nvme_ctrlr = NULL;
397 :
398 52 : pthread_mutex_lock(&g_bdev_nvme_mutex);
399 71 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
400 19 : nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, trid, hostnqn);
401 19 : if (nvme_ctrlr != NULL) {
402 0 : break;
403 : }
404 : }
405 52 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
406 :
407 52 : return nvme_ctrlr;
408 : }
409 :
410 : struct nvme_ctrlr *
411 125 : nvme_ctrlr_get_by_name(const char *name)
412 : {
413 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
414 125 : struct nvme_ctrlr *nvme_ctrlr = NULL;
415 :
416 125 : if (name == NULL) {
417 0 : return NULL;
418 : }
419 :
420 125 : pthread_mutex_lock(&g_bdev_nvme_mutex);
421 125 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
422 125 : if (nbdev_ctrlr != NULL) {
423 60 : nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
424 : }
425 125 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
426 :
427 125 : return nvme_ctrlr;
428 : }
429 :
430 : void
431 0 : nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx)
432 : {
433 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
434 :
435 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
436 0 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
437 0 : fn(nbdev_ctrlr, ctx);
438 : }
439 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
440 0 : }
441 :
442 : struct nvme_ctrlr_channel_iter {
443 : nvme_ctrlr_for_each_channel_msg fn;
444 : nvme_ctrlr_for_each_channel_done cpl;
445 : struct spdk_io_channel_iter *i;
446 : void *ctx;
447 : };
448 :
449 : void
450 284 : nvme_ctrlr_for_each_channel_continue(struct nvme_ctrlr_channel_iter *iter, int status)
451 : {
452 284 : spdk_for_each_channel_continue(iter->i, status);
453 284 : }
454 :
455 : static void
456 284 : nvme_ctrlr_each_channel_msg(struct spdk_io_channel_iter *i)
457 : {
458 284 : struct nvme_ctrlr_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
459 284 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
460 284 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
461 284 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
462 :
463 284 : iter->i = i;
464 284 : iter->fn(iter, nvme_ctrlr, ctrlr_ch, iter->ctx);
465 284 : }
466 :
467 : static void
468 165 : nvme_ctrlr_each_channel_cpl(struct spdk_io_channel_iter *i, int status)
469 : {
470 165 : struct nvme_ctrlr_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
471 165 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
472 :
473 165 : iter->i = i;
474 165 : iter->cpl(nvme_ctrlr, iter->ctx, status);
475 :
476 165 : free(iter);
477 165 : }
478 :
479 : void
480 165 : nvme_ctrlr_for_each_channel(struct nvme_ctrlr *nvme_ctrlr,
481 : nvme_ctrlr_for_each_channel_msg fn, void *ctx,
482 : nvme_ctrlr_for_each_channel_done cpl)
483 : {
484 : struct nvme_ctrlr_channel_iter *iter;
485 :
486 165 : assert(nvme_ctrlr != NULL && fn != NULL);
487 :
488 165 : iter = calloc(1, sizeof(struct nvme_ctrlr_channel_iter));
489 165 : if (iter == NULL) {
490 0 : SPDK_ERRLOG("Unable to allocate iterator\n");
491 0 : assert(false);
492 : return;
493 : }
494 :
495 165 : iter->fn = fn;
496 165 : iter->cpl = cpl;
497 165 : iter->ctx = ctx;
498 :
499 165 : spdk_for_each_channel(nvme_ctrlr, nvme_ctrlr_each_channel_msg,
500 : iter, nvme_ctrlr_each_channel_cpl);
501 : }
502 :
503 : struct nvme_bdev_channel_iter {
504 : nvme_bdev_for_each_channel_msg fn;
505 : nvme_bdev_for_each_channel_done cpl;
506 : struct spdk_io_channel_iter *i;
507 : void *ctx;
508 : };
509 :
510 : void
511 67 : nvme_bdev_for_each_channel_continue(struct nvme_bdev_channel_iter *iter, int status)
512 : {
513 67 : spdk_for_each_channel_continue(iter->i, status);
514 67 : }
515 :
516 : static void
517 67 : nvme_bdev_each_channel_msg(struct spdk_io_channel_iter *i)
518 : {
519 67 : struct nvme_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
520 67 : struct nvme_bdev *nbdev = spdk_io_channel_iter_get_io_device(i);
521 67 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
522 67 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
523 :
524 67 : iter->i = i;
525 67 : iter->fn(iter, nbdev, nbdev_ch, iter->ctx);
526 67 : }
527 :
528 : static void
529 59 : nvme_bdev_each_channel_cpl(struct spdk_io_channel_iter *i, int status)
530 : {
531 59 : struct nvme_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
532 59 : struct nvme_bdev *nbdev = spdk_io_channel_iter_get_io_device(i);
533 :
534 59 : iter->i = i;
535 59 : iter->cpl(nbdev, iter->ctx, status);
536 :
537 59 : free(iter);
538 59 : }
539 :
540 : void
541 59 : nvme_bdev_for_each_channel(struct nvme_bdev *nbdev,
542 : nvme_bdev_for_each_channel_msg fn, void *ctx,
543 : nvme_bdev_for_each_channel_done cpl)
544 : {
545 : struct nvme_bdev_channel_iter *iter;
546 :
547 59 : assert(nbdev != NULL && fn != NULL);
548 :
549 59 : iter = calloc(1, sizeof(struct nvme_bdev_channel_iter));
550 59 : if (iter == NULL) {
551 0 : SPDK_ERRLOG("Unable to allocate iterator\n");
552 0 : assert(false);
553 : return;
554 : }
555 :
556 59 : iter->fn = fn;
557 59 : iter->cpl = cpl;
558 59 : iter->ctx = ctx;
559 :
560 59 : spdk_for_each_channel(nbdev, nvme_bdev_each_channel_msg, iter,
561 : nvme_bdev_each_channel_cpl);
562 : }
563 :
564 : void
565 0 : nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
566 : {
567 : const char *trtype_str;
568 : const char *adrfam_str;
569 :
570 0 : trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
571 0 : if (trtype_str) {
572 0 : spdk_json_write_named_string(w, "trtype", trtype_str);
573 : }
574 :
575 0 : adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
576 0 : if (adrfam_str) {
577 0 : spdk_json_write_named_string(w, "adrfam", adrfam_str);
578 : }
579 :
580 0 : if (trid->traddr[0] != '\0') {
581 0 : spdk_json_write_named_string(w, "traddr", trid->traddr);
582 : }
583 :
584 0 : if (trid->trsvcid[0] != '\0') {
585 0 : spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
586 : }
587 :
588 0 : if (trid->subnqn[0] != '\0') {
589 0 : spdk_json_write_named_string(w, "subnqn", trid->subnqn);
590 : }
591 0 : }
592 :
593 : static void
594 60 : nvme_bdev_ctrlr_delete(struct nvme_bdev_ctrlr *nbdev_ctrlr,
595 : struct nvme_ctrlr *nvme_ctrlr)
596 : {
597 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_delete, nvme_ctrlr->nbdev_ctrlr->name);
598 60 : pthread_mutex_lock(&g_bdev_nvme_mutex);
599 :
600 60 : TAILQ_REMOVE(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
601 60 : if (!TAILQ_EMPTY(&nbdev_ctrlr->ctrlrs)) {
602 15 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
603 :
604 15 : return;
605 : }
606 45 : TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
607 :
608 45 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
609 :
610 45 : assert(TAILQ_EMPTY(&nbdev_ctrlr->bdevs));
611 :
612 45 : free(nbdev_ctrlr->name);
613 45 : free(nbdev_ctrlr);
614 : }
615 :
616 : static void
617 61 : _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
618 : {
619 : struct nvme_path_id *path_id, *tmp_path;
620 : struct nvme_ns *ns, *tmp_ns;
621 :
622 61 : free(nvme_ctrlr->copied_ana_desc);
623 61 : spdk_free(nvme_ctrlr->ana_log_page);
624 :
625 61 : if (nvme_ctrlr->opal_dev) {
626 0 : spdk_opal_dev_destruct(nvme_ctrlr->opal_dev);
627 0 : nvme_ctrlr->opal_dev = NULL;
628 : }
629 :
630 61 : if (nvme_ctrlr->nbdev_ctrlr) {
631 60 : nvme_bdev_ctrlr_delete(nvme_ctrlr->nbdev_ctrlr, nvme_ctrlr);
632 : }
633 :
634 61 : RB_FOREACH_SAFE(ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp_ns) {
635 0 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
636 0 : nvme_ns_free(ns);
637 : }
638 :
639 122 : TAILQ_FOREACH_SAFE(path_id, &nvme_ctrlr->trids, link, tmp_path) {
640 61 : TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
641 61 : free(path_id);
642 : }
643 :
644 61 : pthread_mutex_destroy(&nvme_ctrlr->mutex);
645 61 : spdk_keyring_put_key(nvme_ctrlr->psk);
646 61 : spdk_keyring_put_key(nvme_ctrlr->dhchap_key);
647 61 : spdk_keyring_put_key(nvme_ctrlr->dhchap_ctrlr_key);
648 61 : free(nvme_ctrlr);
649 :
650 61 : pthread_mutex_lock(&g_bdev_nvme_mutex);
651 61 : if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
652 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
653 0 : spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
654 0 : spdk_bdev_module_fini_done();
655 0 : return;
656 : }
657 61 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
658 : }
659 :
660 : static int
661 61 : nvme_detach_poller(void *arg)
662 : {
663 61 : struct nvme_ctrlr *nvme_ctrlr = arg;
664 : int rc;
665 :
666 61 : rc = spdk_nvme_detach_poll_async(nvme_ctrlr->detach_ctx);
667 61 : if (rc != -EAGAIN) {
668 61 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
669 61 : _nvme_ctrlr_delete(nvme_ctrlr);
670 : }
671 :
672 61 : return SPDK_POLLER_BUSY;
673 : }
674 :
675 : static void
676 61 : nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
677 : {
678 : int rc;
679 :
680 61 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
681 :
682 : /* First, unregister the adminq poller, as the driver will poll adminq if necessary */
683 61 : spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
684 :
685 : /* If we got here, the reset/detach poller cannot be active */
686 61 : assert(nvme_ctrlr->reset_detach_poller == NULL);
687 61 : nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(nvme_detach_poller,
688 : nvme_ctrlr, 1000);
689 61 : if (nvme_ctrlr->reset_detach_poller == NULL) {
690 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to register detach poller\n");
691 0 : goto error;
692 : }
693 :
694 61 : rc = spdk_nvme_detach_async(nvme_ctrlr->ctrlr, &nvme_ctrlr->detach_ctx);
695 61 : if (rc != 0) {
696 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to detach the NVMe controller\n");
697 0 : goto error;
698 : }
699 :
700 61 : return;
701 0 : error:
702 : /* We don't have a good way to handle errors here, so just do what we can and delete the
703 : * controller without detaching the underlying NVMe device.
704 : */
705 0 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
706 0 : _nvme_ctrlr_delete(nvme_ctrlr);
707 : }
708 :
709 : static void
710 60 : nvme_ctrlr_unregister_cb(void *io_device)
711 : {
712 60 : struct nvme_ctrlr *nvme_ctrlr = io_device;
713 :
714 60 : nvme_ctrlr_delete(nvme_ctrlr);
715 60 : }
716 :
717 : static void
718 60 : nvme_ctrlr_unregister(void *ctx)
719 : {
720 60 : struct nvme_ctrlr *nvme_ctrlr = ctx;
721 :
722 60 : spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb);
723 60 : }
724 :
725 : static bool
726 244 : nvme_ctrlr_can_be_unregistered(struct nvme_ctrlr *nvme_ctrlr)
727 : {
728 244 : if (!nvme_ctrlr->destruct) {
729 128 : return false;
730 : }
731 :
732 116 : if (nvme_ctrlr->ref > 0) {
733 56 : return false;
734 : }
735 :
736 60 : if (nvme_ctrlr->resetting) {
737 0 : return false;
738 : }
739 :
740 60 : if (nvme_ctrlr->ana_log_page_updating) {
741 0 : return false;
742 : }
743 :
744 60 : if (nvme_ctrlr->io_path_cache_clearing) {
745 0 : return false;
746 : }
747 :
748 60 : return true;
749 : }
750 :
751 : static void
752 168 : nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr)
753 : {
754 168 : pthread_mutex_lock(&nvme_ctrlr->mutex);
755 : SPDK_DTRACE_PROBE2(bdev_nvme_ctrlr_release, nvme_ctrlr->nbdev_ctrlr->name, nvme_ctrlr->ref);
756 :
757 168 : assert(nvme_ctrlr->ref > 0);
758 168 : nvme_ctrlr->ref--;
759 :
760 168 : if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
761 108 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
762 108 : return;
763 : }
764 :
765 60 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
766 :
767 60 : spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr);
768 : }
769 :
770 : static void
771 251 : bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch)
772 : {
773 251 : nbdev_ch->current_io_path = NULL;
774 251 : nbdev_ch->rr_counter = 0;
775 251 : }
776 :
777 : static struct nvme_io_path *
778 8 : _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
779 : {
780 : struct nvme_io_path *io_path;
781 :
782 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
783 15 : if (io_path->nvme_ns == nvme_ns) {
784 7 : break;
785 : }
786 : }
787 :
788 8 : return io_path;
789 : }
790 :
791 : static struct nvme_io_path *
792 37 : nvme_io_path_alloc(void)
793 : {
794 : struct nvme_io_path *io_path;
795 :
796 37 : io_path = calloc(1, sizeof(*io_path));
797 37 : if (io_path == NULL) {
798 0 : SPDK_ERRLOG("Failed to alloc io_path.\n");
799 0 : return NULL;
800 : }
801 :
802 37 : if (g_opts.io_path_stat) {
803 0 : io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
804 0 : if (io_path->stat == NULL) {
805 0 : free(io_path);
806 0 : SPDK_ERRLOG("Failed to alloc io_path stat.\n");
807 0 : return NULL;
808 : }
809 0 : spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
810 : }
811 :
812 37 : return io_path;
813 : }
814 :
815 : static void
816 37 : nvme_io_path_free(struct nvme_io_path *io_path)
817 : {
818 37 : free(io_path->stat);
819 37 : free(io_path);
820 37 : }
821 :
822 : static int
823 37 : _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
824 : {
825 : struct nvme_io_path *io_path;
826 : struct spdk_io_channel *ch;
827 : struct nvme_ctrlr_channel *ctrlr_ch;
828 : struct nvme_qpair *nvme_qpair;
829 :
830 37 : io_path = nvme_io_path_alloc();
831 37 : if (io_path == NULL) {
832 0 : return -ENOMEM;
833 : }
834 :
835 37 : io_path->nvme_ns = nvme_ns;
836 :
837 37 : ch = spdk_get_io_channel(nvme_ns->ctrlr);
838 37 : if (ch == NULL) {
839 0 : nvme_io_path_free(io_path);
840 0 : SPDK_ERRLOG("Failed to alloc io_channel.\n");
841 0 : return -ENOMEM;
842 : }
843 :
844 37 : ctrlr_ch = spdk_io_channel_get_ctx(ch);
845 :
846 37 : nvme_qpair = ctrlr_ch->qpair;
847 37 : assert(nvme_qpair != NULL);
848 :
849 37 : io_path->qpair = nvme_qpair;
850 37 : TAILQ_INSERT_TAIL(&nvme_qpair->io_path_list, io_path, tailq);
851 :
852 37 : io_path->nbdev_ch = nbdev_ch;
853 37 : STAILQ_INSERT_TAIL(&nbdev_ch->io_path_list, io_path, stailq);
854 :
855 37 : bdev_nvme_clear_current_io_path(nbdev_ch);
856 :
857 37 : return 0;
858 : }
859 :
860 : static void
861 37 : bdev_nvme_clear_retry_io_path(struct nvme_bdev_channel *nbdev_ch,
862 : struct nvme_io_path *io_path)
863 : {
864 : struct nvme_bdev_io *bio;
865 :
866 38 : TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
867 1 : if (bio->io_path == io_path) {
868 1 : bio->io_path = NULL;
869 : }
870 : }
871 37 : }
872 :
873 : static void
874 37 : _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *io_path)
875 : {
876 : struct spdk_io_channel *ch;
877 : struct nvme_qpair *nvme_qpair;
878 : struct nvme_ctrlr_channel *ctrlr_ch;
879 : struct nvme_bdev *nbdev;
880 :
881 37 : nbdev = spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(nbdev_ch));
882 :
883 : /* Add the statistics to nvme_ns before this path is destroyed. */
884 37 : pthread_mutex_lock(&nbdev->mutex);
885 37 : if (nbdev->ref != 0 && io_path->nvme_ns->stat != NULL && io_path->stat != NULL) {
886 0 : spdk_bdev_add_io_stat(io_path->nvme_ns->stat, io_path->stat);
887 : }
888 37 : pthread_mutex_unlock(&nbdev->mutex);
889 :
890 37 : bdev_nvme_clear_current_io_path(nbdev_ch);
891 37 : bdev_nvme_clear_retry_io_path(nbdev_ch, io_path);
892 :
893 37 : STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
894 37 : io_path->nbdev_ch = NULL;
895 :
896 37 : nvme_qpair = io_path->qpair;
897 37 : assert(nvme_qpair != NULL);
898 :
899 37 : ctrlr_ch = nvme_qpair->ctrlr_ch;
900 37 : assert(ctrlr_ch != NULL);
901 :
902 37 : ch = spdk_io_channel_from_ctx(ctrlr_ch);
903 37 : spdk_put_io_channel(ch);
904 :
905 : /* After an io_path is removed, I/Os submitted to it may complete and update statistics
906 : * of the io_path. To avoid heap-use-after-free error from this case, do not free the
907 : * io_path here but free the io_path when the associated qpair is freed. It is ensured
908 : * that all I/Os submitted to the io_path are completed when the associated qpair is freed.
909 : */
910 37 : }
911 :
912 : static void
913 24 : _bdev_nvme_delete_io_paths(struct nvme_bdev_channel *nbdev_ch)
914 : {
915 : struct nvme_io_path *io_path, *tmp_io_path;
916 :
917 59 : STAILQ_FOREACH_SAFE(io_path, &nbdev_ch->io_path_list, stailq, tmp_io_path) {
918 35 : _bdev_nvme_delete_io_path(nbdev_ch, io_path);
919 : }
920 24 : }
921 :
922 : static int
923 24 : bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
924 : {
925 24 : struct nvme_bdev_channel *nbdev_ch = ctx_buf;
926 24 : struct nvme_bdev *nbdev = io_device;
927 : struct nvme_ns *nvme_ns;
928 : int rc;
929 :
930 24 : STAILQ_INIT(&nbdev_ch->io_path_list);
931 24 : TAILQ_INIT(&nbdev_ch->retry_io_list);
932 :
933 24 : pthread_mutex_lock(&nbdev->mutex);
934 :
935 24 : nbdev_ch->mp_policy = nbdev->mp_policy;
936 24 : nbdev_ch->mp_selector = nbdev->mp_selector;
937 24 : nbdev_ch->rr_min_io = nbdev->rr_min_io;
938 :
939 59 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
940 35 : rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
941 35 : if (rc != 0) {
942 0 : pthread_mutex_unlock(&nbdev->mutex);
943 :
944 0 : _bdev_nvme_delete_io_paths(nbdev_ch);
945 0 : return rc;
946 : }
947 : }
948 24 : pthread_mutex_unlock(&nbdev->mutex);
949 :
950 24 : return 0;
951 : }
952 :
953 : /* If cpl != NULL, complete the bdev_io with nvme status based on 'cpl'.
954 : * If cpl == NULL, complete the bdev_io with bdev status based on 'status'.
955 : */
956 : static inline void
957 57 : __bdev_nvme_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status,
958 : const struct spdk_nvme_cpl *cpl)
959 : {
960 57 : spdk_trace_record(TRACE_BDEV_NVME_IO_DONE, 0, 0, (uintptr_t)bdev_io->driver_ctx,
961 : (uintptr_t)bdev_io);
962 57 : if (cpl) {
963 29 : spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
964 : } else {
965 28 : spdk_bdev_io_complete(bdev_io, status);
966 : }
967 57 : }
968 :
969 : static void bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch);
970 :
971 : static void
972 24 : bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf)
973 : {
974 24 : struct nvme_bdev_channel *nbdev_ch = ctx_buf;
975 :
976 24 : bdev_nvme_abort_retry_ios(nbdev_ch);
977 24 : _bdev_nvme_delete_io_paths(nbdev_ch);
978 24 : }
979 :
980 : static inline bool
981 62 : bdev_nvme_io_type_is_admin(enum spdk_bdev_io_type io_type)
982 : {
983 62 : switch (io_type) {
984 5 : case SPDK_BDEV_IO_TYPE_RESET:
985 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
986 : case SPDK_BDEV_IO_TYPE_ABORT:
987 5 : return true;
988 57 : default:
989 57 : break;
990 : }
991 :
992 57 : return false;
993 : }
994 :
995 : static inline bool
996 98 : nvme_ns_is_active(struct nvme_ns *nvme_ns)
997 : {
998 98 : if (spdk_unlikely(nvme_ns->ana_state_updating)) {
999 1 : return false;
1000 : }
1001 :
1002 97 : if (spdk_unlikely(nvme_ns->ns == NULL)) {
1003 0 : return false;
1004 : }
1005 :
1006 97 : return true;
1007 : }
1008 :
1009 : static inline bool
1010 86 : nvme_ns_is_accessible(struct nvme_ns *nvme_ns)
1011 : {
1012 86 : if (spdk_unlikely(!nvme_ns_is_active(nvme_ns))) {
1013 1 : return false;
1014 : }
1015 :
1016 85 : switch (nvme_ns->ana_state) {
1017 76 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
1018 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
1019 76 : return true;
1020 9 : default:
1021 9 : break;
1022 : }
1023 :
1024 9 : return false;
1025 : }
1026 :
1027 : static inline bool
1028 128 : nvme_qpair_is_connected(struct nvme_qpair *nvme_qpair)
1029 : {
1030 128 : if (spdk_unlikely(nvme_qpair->qpair == NULL)) {
1031 23 : return false;
1032 : }
1033 :
1034 105 : if (spdk_unlikely(spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
1035 : SPDK_NVME_QPAIR_FAILURE_NONE)) {
1036 2 : return false;
1037 : }
1038 :
1039 103 : if (spdk_unlikely(nvme_qpair->ctrlr_ch->reset_iter != NULL)) {
1040 0 : return false;
1041 : }
1042 :
1043 103 : return true;
1044 : }
1045 :
1046 : static inline bool
1047 102 : nvme_io_path_is_available(struct nvme_io_path *io_path)
1048 : {
1049 102 : if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
1050 16 : return false;
1051 : }
1052 :
1053 86 : if (spdk_unlikely(!nvme_ns_is_accessible(io_path->nvme_ns))) {
1054 10 : return false;
1055 : }
1056 :
1057 76 : return true;
1058 : }
1059 :
1060 : static inline bool
1061 9 : nvme_ctrlr_is_failed(struct nvme_ctrlr *nvme_ctrlr)
1062 : {
1063 9 : if (nvme_ctrlr->destruct) {
1064 0 : return true;
1065 : }
1066 :
1067 9 : if (nvme_ctrlr->fast_io_fail_timedout) {
1068 2 : return true;
1069 : }
1070 :
1071 7 : if (nvme_ctrlr->resetting) {
1072 5 : if (nvme_ctrlr->opts.reconnect_delay_sec != 0) {
1073 5 : return false;
1074 : } else {
1075 0 : return true;
1076 : }
1077 : }
1078 :
1079 2 : if (nvme_ctrlr->reconnect_is_delayed) {
1080 2 : return false;
1081 : }
1082 :
1083 0 : if (nvme_ctrlr->disabled) {
1084 0 : return true;
1085 : }
1086 :
1087 0 : if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
1088 0 : return true;
1089 : } else {
1090 0 : return false;
1091 : }
1092 : }
1093 :
1094 : static bool
1095 20 : nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr)
1096 : {
1097 20 : if (nvme_ctrlr->destruct) {
1098 0 : return false;
1099 : }
1100 :
1101 20 : if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
1102 3 : return false;
1103 : }
1104 :
1105 17 : if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) {
1106 1 : return false;
1107 : }
1108 :
1109 16 : if (nvme_ctrlr->disabled) {
1110 0 : return false;
1111 : }
1112 :
1113 16 : return true;
1114 : }
1115 :
1116 : /* Simulate circular linked list. */
1117 : static inline struct nvme_io_path *
1118 99 : nvme_io_path_get_next(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *prev_path)
1119 : {
1120 : struct nvme_io_path *next_path;
1121 :
1122 99 : if (prev_path != NULL) {
1123 39 : next_path = STAILQ_NEXT(prev_path, stailq);
1124 39 : if (next_path != NULL) {
1125 14 : return next_path;
1126 : }
1127 : }
1128 :
1129 85 : return STAILQ_FIRST(&nbdev_ch->io_path_list);
1130 : }
1131 :
1132 : static struct nvme_io_path *
1133 67 : _bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
1134 : {
1135 67 : struct nvme_io_path *io_path, *start, *non_optimized = NULL;
1136 :
1137 67 : start = nvme_io_path_get_next(nbdev_ch, nbdev_ch->current_io_path);
1138 :
1139 67 : io_path = start;
1140 : do {
1141 79 : if (spdk_likely(nvme_io_path_is_available(io_path))) {
1142 57 : switch (io_path->nvme_ns->ana_state) {
1143 47 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
1144 47 : nbdev_ch->current_io_path = io_path;
1145 47 : return io_path;
1146 10 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
1147 10 : if (non_optimized == NULL) {
1148 7 : non_optimized = io_path;
1149 : }
1150 10 : break;
1151 0 : default:
1152 0 : assert(false);
1153 : break;
1154 : }
1155 : }
1156 32 : io_path = nvme_io_path_get_next(nbdev_ch, io_path);
1157 32 : } while (io_path != start);
1158 :
1159 20 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
1160 : /* We come here only if there is no optimized path. Cache even non_optimized
1161 : * path for load balance across multiple non_optimized paths.
1162 : */
1163 1 : nbdev_ch->current_io_path = non_optimized;
1164 : }
1165 :
1166 20 : return non_optimized;
1167 : }
1168 :
1169 : static struct nvme_io_path *
1170 4 : _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch)
1171 : {
1172 : struct nvme_io_path *io_path;
1173 4 : struct nvme_io_path *optimized = NULL, *non_optimized = NULL;
1174 4 : uint32_t opt_min_qd = UINT32_MAX, non_opt_min_qd = UINT32_MAX;
1175 : uint32_t num_outstanding_reqs;
1176 :
1177 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
1178 12 : if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
1179 : /* The device is currently resetting. */
1180 0 : continue;
1181 : }
1182 :
1183 12 : if (spdk_unlikely(!nvme_ns_is_active(io_path->nvme_ns))) {
1184 0 : continue;
1185 : }
1186 :
1187 12 : num_outstanding_reqs = spdk_nvme_qpair_get_num_outstanding_reqs(io_path->qpair->qpair);
1188 12 : switch (io_path->nvme_ns->ana_state) {
1189 6 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
1190 6 : if (num_outstanding_reqs < opt_min_qd) {
1191 5 : opt_min_qd = num_outstanding_reqs;
1192 5 : optimized = io_path;
1193 : }
1194 6 : break;
1195 3 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
1196 3 : if (num_outstanding_reqs < non_opt_min_qd) {
1197 3 : non_opt_min_qd = num_outstanding_reqs;
1198 3 : non_optimized = io_path;
1199 : }
1200 3 : break;
1201 3 : default:
1202 3 : break;
1203 : }
1204 : }
1205 :
1206 : /* don't cache io path for BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH selector */
1207 4 : if (optimized != NULL) {
1208 3 : return optimized;
1209 : }
1210 :
1211 1 : return non_optimized;
1212 : }
1213 :
1214 : static inline struct nvme_io_path *
1215 105 : bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
1216 : {
1217 105 : if (spdk_likely(nbdev_ch->current_io_path != NULL)) {
1218 41 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) {
1219 31 : return nbdev_ch->current_io_path;
1220 10 : } else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
1221 10 : if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) {
1222 3 : return nbdev_ch->current_io_path;
1223 : }
1224 7 : nbdev_ch->rr_counter = 0;
1225 : }
1226 : }
1227 :
1228 71 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE ||
1229 14 : nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
1230 67 : return _bdev_nvme_find_io_path(nbdev_ch);
1231 : } else {
1232 4 : return _bdev_nvme_find_io_path_min_qd(nbdev_ch);
1233 : }
1234 : }
1235 :
1236 : /* Return true if there is any io_path whose qpair is active or ctrlr is not failed,
1237 : * or false otherwise.
1238 : *
1239 : * If any io_path has an active qpair but find_io_path() returned NULL, its namespace
1240 : * is likely to be non-accessible now but may become accessible.
1241 : *
1242 : * If any io_path has an unfailed ctrlr but find_io_path() returned NULL, the ctrlr
1243 : * is likely to be resetting now but the reset may succeed. A ctrlr is set to unfailed
1244 : * when starting to reset it but it is set to failed when the reset failed. Hence, if
1245 : * a ctrlr is unfailed, it is likely that it works fine or is resetting.
1246 : */
1247 : static bool
1248 15 : any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
1249 : {
1250 : struct nvme_io_path *io_path;
1251 :
1252 15 : if (nbdev_ch->resetting) {
1253 1 : return false;
1254 : }
1255 :
1256 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
1257 14 : if (io_path->nvme_ns->ana_transition_timedout) {
1258 0 : continue;
1259 : }
1260 :
1261 14 : if (nvme_qpair_is_connected(io_path->qpair) ||
1262 9 : !nvme_ctrlr_is_failed(io_path->qpair->ctrlr)) {
1263 12 : return true;
1264 : }
1265 : }
1266 :
1267 2 : return false;
1268 : }
1269 :
1270 : static void
1271 14 : bdev_nvme_retry_io(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
1272 : {
1273 14 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1274 : struct spdk_io_channel *ch;
1275 :
1276 14 : if (nbdev_io->io_path != NULL && nvme_io_path_is_available(nbdev_io->io_path)) {
1277 3 : _bdev_nvme_submit_request(nbdev_ch, bdev_io);
1278 : } else {
1279 11 : ch = spdk_io_channel_from_ctx(nbdev_ch);
1280 11 : bdev_nvme_submit_request(ch, bdev_io);
1281 : }
1282 14 : }
1283 :
1284 : static int
1285 14 : bdev_nvme_retry_ios(void *arg)
1286 : {
1287 14 : struct nvme_bdev_channel *nbdev_ch = arg;
1288 : struct nvme_bdev_io *bio, *tmp_bio;
1289 : uint64_t now, delay_us;
1290 :
1291 14 : now = spdk_get_ticks();
1292 :
1293 28 : TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
1294 15 : if (bio->retry_ticks > now) {
1295 1 : break;
1296 : }
1297 :
1298 14 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
1299 :
1300 14 : bdev_nvme_retry_io(nbdev_ch, spdk_bdev_io_from_ctx(bio));
1301 : }
1302 :
1303 14 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1304 :
1305 14 : bio = TAILQ_FIRST(&nbdev_ch->retry_io_list);
1306 14 : if (bio != NULL) {
1307 4 : delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
1308 :
1309 4 : nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
1310 : delay_us);
1311 : }
1312 :
1313 14 : return SPDK_POLLER_BUSY;
1314 : }
1315 :
1316 : static void
1317 16 : bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
1318 : struct nvme_bdev_io *bio, uint64_t delay_ms)
1319 : {
1320 : struct nvme_bdev_io *tmp_bio;
1321 :
1322 16 : bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;
1323 :
1324 16 : TAILQ_FOREACH_REVERSE(tmp_bio, &nbdev_ch->retry_io_list, retry_io_head, retry_link) {
1325 1 : if (tmp_bio->retry_ticks <= bio->retry_ticks) {
1326 1 : TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bio, bio,
1327 : retry_link);
1328 1 : return;
1329 : }
1330 : }
1331 :
1332 : /* No earlier I/Os were found. This I/O must be the new head. */
1333 15 : TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bio, retry_link);
1334 :
1335 15 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1336 :
1337 15 : nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
1338 : delay_ms * 1000ULL);
1339 : }
1340 :
1341 : static void
1342 54 : bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
1343 : {
1344 : struct nvme_bdev_io *bio, *tmp_bio;
1345 :
1346 55 : TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
1347 1 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
1348 1 : __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
1349 : }
1350 :
1351 54 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1352 54 : }
1353 :
1354 : static int
1355 6 : bdev_nvme_abort_retry_io(struct nvme_bdev_channel *nbdev_ch,
1356 : struct nvme_bdev_io *bio_to_abort)
1357 : {
1358 : struct nvme_bdev_io *bio;
1359 :
1360 6 : TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
1361 1 : if (bio == bio_to_abort) {
1362 1 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
1363 1 : __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
1364 1 : return 0;
1365 : }
1366 : }
1367 :
1368 5 : return -ENOENT;
1369 : }
1370 :
1371 : static void
1372 12 : bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl)
1373 : {
1374 : struct nvme_bdev *nbdev;
1375 : uint16_t sct, sc;
1376 :
1377 12 : assert(spdk_nvme_cpl_is_error(cpl));
1378 :
1379 12 : nbdev = bdev_io->bdev->ctxt;
1380 :
1381 12 : if (nbdev->err_stat == NULL) {
1382 12 : return;
1383 : }
1384 :
1385 0 : sct = cpl->status.sct;
1386 0 : sc = cpl->status.sc;
1387 :
1388 0 : pthread_mutex_lock(&nbdev->mutex);
1389 :
1390 0 : nbdev->err_stat->status_type[sct]++;
1391 0 : switch (sct) {
1392 0 : case SPDK_NVME_SCT_GENERIC:
1393 : case SPDK_NVME_SCT_COMMAND_SPECIFIC:
1394 : case SPDK_NVME_SCT_MEDIA_ERROR:
1395 : case SPDK_NVME_SCT_PATH:
1396 0 : nbdev->err_stat->status[sct][sc]++;
1397 0 : break;
1398 0 : default:
1399 0 : break;
1400 : }
1401 :
1402 0 : pthread_mutex_unlock(&nbdev->mutex);
1403 : }
1404 :
1405 : static inline void
1406 20 : bdev_nvme_update_io_path_stat(struct nvme_bdev_io *bio)
1407 : {
1408 20 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1409 20 : uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
1410 20 : uint32_t blocklen = bdev_io->bdev->blocklen;
1411 : struct spdk_bdev_io_stat *stat;
1412 : uint64_t tsc_diff;
1413 :
1414 20 : if (bio->io_path->stat == NULL) {
1415 20 : return;
1416 : }
1417 :
1418 0 : tsc_diff = spdk_get_ticks() - bio->submit_tsc;
1419 0 : stat = bio->io_path->stat;
1420 :
1421 0 : switch (bdev_io->type) {
1422 0 : case SPDK_BDEV_IO_TYPE_READ:
1423 0 : stat->bytes_read += num_blocks * blocklen;
1424 0 : stat->num_read_ops++;
1425 0 : stat->read_latency_ticks += tsc_diff;
1426 0 : if (stat->max_read_latency_ticks < tsc_diff) {
1427 0 : stat->max_read_latency_ticks = tsc_diff;
1428 : }
1429 0 : if (stat->min_read_latency_ticks > tsc_diff) {
1430 0 : stat->min_read_latency_ticks = tsc_diff;
1431 : }
1432 0 : break;
1433 0 : case SPDK_BDEV_IO_TYPE_WRITE:
1434 0 : stat->bytes_written += num_blocks * blocklen;
1435 0 : stat->num_write_ops++;
1436 0 : stat->write_latency_ticks += tsc_diff;
1437 0 : if (stat->max_write_latency_ticks < tsc_diff) {
1438 0 : stat->max_write_latency_ticks = tsc_diff;
1439 : }
1440 0 : if (stat->min_write_latency_ticks > tsc_diff) {
1441 0 : stat->min_write_latency_ticks = tsc_diff;
1442 : }
1443 0 : break;
1444 0 : case SPDK_BDEV_IO_TYPE_UNMAP:
1445 0 : stat->bytes_unmapped += num_blocks * blocklen;
1446 0 : stat->num_unmap_ops++;
1447 0 : stat->unmap_latency_ticks += tsc_diff;
1448 0 : if (stat->max_unmap_latency_ticks < tsc_diff) {
1449 0 : stat->max_unmap_latency_ticks = tsc_diff;
1450 : }
1451 0 : if (stat->min_unmap_latency_ticks > tsc_diff) {
1452 0 : stat->min_unmap_latency_ticks = tsc_diff;
1453 : }
1454 0 : break;
1455 0 : case SPDK_BDEV_IO_TYPE_ZCOPY:
1456 : /* Track the data in the start phase only */
1457 0 : if (!bdev_io->u.bdev.zcopy.start) {
1458 0 : break;
1459 : }
1460 0 : if (bdev_io->u.bdev.zcopy.populate) {
1461 0 : stat->bytes_read += num_blocks * blocklen;
1462 0 : stat->num_read_ops++;
1463 0 : stat->read_latency_ticks += tsc_diff;
1464 0 : if (stat->max_read_latency_ticks < tsc_diff) {
1465 0 : stat->max_read_latency_ticks = tsc_diff;
1466 : }
1467 0 : if (stat->min_read_latency_ticks > tsc_diff) {
1468 0 : stat->min_read_latency_ticks = tsc_diff;
1469 : }
1470 : } else {
1471 0 : stat->bytes_written += num_blocks * blocklen;
1472 0 : stat->num_write_ops++;
1473 0 : stat->write_latency_ticks += tsc_diff;
1474 0 : if (stat->max_write_latency_ticks < tsc_diff) {
1475 0 : stat->max_write_latency_ticks = tsc_diff;
1476 : }
1477 0 : if (stat->min_write_latency_ticks > tsc_diff) {
1478 0 : stat->min_write_latency_ticks = tsc_diff;
1479 : }
1480 : }
1481 0 : break;
1482 0 : case SPDK_BDEV_IO_TYPE_COPY:
1483 0 : stat->bytes_copied += num_blocks * blocklen;
1484 0 : stat->num_copy_ops++;
1485 0 : stat->copy_latency_ticks += tsc_diff;
1486 0 : if (stat->max_copy_latency_ticks < tsc_diff) {
1487 0 : stat->max_copy_latency_ticks = tsc_diff;
1488 : }
1489 0 : if (stat->min_copy_latency_ticks > tsc_diff) {
1490 0 : stat->min_copy_latency_ticks = tsc_diff;
1491 : }
1492 0 : break;
1493 0 : default:
1494 0 : break;
1495 : }
1496 : }
1497 :
1498 : static bool
1499 7 : bdev_nvme_check_retry_io(struct nvme_bdev_io *bio,
1500 : const struct spdk_nvme_cpl *cpl,
1501 : struct nvme_bdev_channel *nbdev_ch,
1502 : uint64_t *_delay_ms)
1503 : {
1504 7 : struct nvme_io_path *io_path = bio->io_path;
1505 7 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
1506 : const struct spdk_nvme_ctrlr_data *cdata;
1507 :
1508 7 : if (spdk_nvme_cpl_is_path_error(cpl) ||
1509 5 : spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
1510 4 : !nvme_io_path_is_available(io_path) ||
1511 4 : !nvme_ctrlr_is_available(nvme_ctrlr)) {
1512 3 : bdev_nvme_clear_current_io_path(nbdev_ch);
1513 3 : bio->io_path = NULL;
1514 3 : if (spdk_nvme_cpl_is_ana_error(cpl)) {
1515 1 : if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
1516 1 : io_path->nvme_ns->ana_state_updating = true;
1517 : }
1518 : }
1519 3 : if (!any_io_path_may_become_available(nbdev_ch)) {
1520 0 : return false;
1521 : }
1522 3 : *_delay_ms = 0;
1523 : } else {
1524 4 : bio->retry_count++;
1525 :
1526 4 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
1527 :
1528 4 : if (cpl->status.crd != 0) {
1529 1 : *_delay_ms = cdata->crdt[cpl->status.crd] * 100;
1530 : } else {
1531 3 : *_delay_ms = 0;
1532 : }
1533 : }
1534 :
1535 7 : return true;
1536 : }
1537 :
1538 : static inline void
1539 32 : bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
1540 : const struct spdk_nvme_cpl *cpl)
1541 : {
1542 32 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1543 : struct nvme_bdev_channel *nbdev_ch;
1544 32 : uint64_t delay_ms;
1545 :
1546 32 : assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
1547 :
1548 32 : if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
1549 20 : bdev_nvme_update_io_path_stat(bio);
1550 20 : goto complete;
1551 : }
1552 :
1553 : /* Update error counts before deciding if retry is needed.
1554 : * Hence, error counts may be more than the number of I/O errors.
1555 : */
1556 12 : bdev_nvme_update_nvme_error_stat(bdev_io, cpl);
1557 :
1558 12 : if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) ||
1559 8 : (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
1560 5 : goto complete;
1561 : }
1562 :
1563 : /* At this point we don't know whether the sequence was successfully executed or not, so we
1564 : * cannot retry the IO */
1565 7 : if (bdev_io->u.bdev.accel_sequence != NULL) {
1566 0 : goto complete;
1567 : }
1568 :
1569 7 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
1570 :
1571 7 : if (bdev_nvme_check_retry_io(bio, cpl, nbdev_ch, &delay_ms)) {
1572 7 : bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
1573 7 : return;
1574 : }
1575 :
1576 25 : complete:
1577 25 : bio->retry_count = 0;
1578 25 : bio->submit_tsc = 0;
1579 25 : bdev_io->u.bdev.accel_sequence = NULL;
1580 25 : __bdev_nvme_io_complete(bdev_io, 0, cpl);
1581 : }
1582 :
1583 : static inline void
1584 13 : bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
1585 : {
1586 13 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1587 : struct nvme_bdev_channel *nbdev_ch;
1588 : enum spdk_bdev_io_status io_status;
1589 :
1590 13 : assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
1591 :
1592 13 : switch (rc) {
1593 1 : case 0:
1594 1 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1595 1 : break;
1596 0 : case -ENOMEM:
1597 0 : io_status = SPDK_BDEV_IO_STATUS_NOMEM;
1598 0 : break;
1599 12 : case -ENXIO:
1600 12 : if (g_opts.bdev_retry_count == -1 || bio->retry_count < g_opts.bdev_retry_count) {
1601 12 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
1602 :
1603 12 : bdev_nvme_clear_current_io_path(nbdev_ch);
1604 12 : bio->io_path = NULL;
1605 :
1606 12 : if (any_io_path_may_become_available(nbdev_ch)) {
1607 9 : bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
1608 9 : return;
1609 : }
1610 : }
1611 :
1612 : /* fallthrough */
1613 : default:
1614 3 : spdk_accel_sequence_abort(bdev_io->u.bdev.accel_sequence);
1615 3 : bdev_io->u.bdev.accel_sequence = NULL;
1616 3 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
1617 3 : break;
1618 : }
1619 :
1620 4 : bio->retry_count = 0;
1621 4 : bio->submit_tsc = 0;
1622 4 : __bdev_nvme_io_complete(bdev_io, io_status, NULL);
1623 : }
1624 :
1625 : static inline void
1626 4 : bdev_nvme_admin_complete(struct nvme_bdev_io *bio, int rc)
1627 : {
1628 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1629 : enum spdk_bdev_io_status io_status;
1630 :
1631 4 : switch (rc) {
1632 1 : case 0:
1633 1 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1634 1 : break;
1635 0 : case -ENOMEM:
1636 0 : io_status = SPDK_BDEV_IO_STATUS_NOMEM;
1637 0 : break;
1638 3 : case -ENXIO:
1639 : /* fallthrough */
1640 : default:
1641 3 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
1642 3 : break;
1643 : }
1644 :
1645 4 : __bdev_nvme_io_complete(bdev_io, io_status, NULL);
1646 4 : }
1647 :
1648 : static void
1649 3 : bdev_nvme_clear_io_path_caches_done(struct nvme_ctrlr *nvme_ctrlr,
1650 : void *ctx, int status)
1651 : {
1652 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1653 :
1654 3 : assert(nvme_ctrlr->io_path_cache_clearing == true);
1655 3 : nvme_ctrlr->io_path_cache_clearing = false;
1656 :
1657 3 : if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
1658 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1659 3 : return;
1660 : }
1661 :
1662 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1663 :
1664 0 : nvme_ctrlr_unregister(nvme_ctrlr);
1665 : }
1666 :
1667 : static void
1668 408 : _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
1669 : {
1670 : struct nvme_io_path *io_path;
1671 :
1672 635 : TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
1673 227 : if (io_path->nbdev_ch == NULL) {
1674 68 : continue;
1675 : }
1676 159 : bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
1677 : }
1678 408 : }
1679 :
1680 : static void
1681 1 : bdev_nvme_clear_io_path_cache(struct nvme_ctrlr_channel_iter *i,
1682 : struct nvme_ctrlr *nvme_ctrlr,
1683 : struct nvme_ctrlr_channel *ctrlr_ch,
1684 : void *ctx)
1685 : {
1686 1 : assert(ctrlr_ch->qpair != NULL);
1687 :
1688 1 : _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
1689 :
1690 1 : nvme_ctrlr_for_each_channel_continue(i, 0);
1691 1 : }
1692 :
1693 : static void
1694 3 : bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr)
1695 : {
1696 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1697 3 : if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
1698 : nvme_ctrlr->io_path_cache_clearing) {
1699 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1700 0 : return;
1701 : }
1702 :
1703 3 : nvme_ctrlr->io_path_cache_clearing = true;
1704 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1705 :
1706 3 : nvme_ctrlr_for_each_channel(nvme_ctrlr,
1707 : bdev_nvme_clear_io_path_cache,
1708 : NULL,
1709 : bdev_nvme_clear_io_path_caches_done);
1710 : }
1711 :
1712 : static struct nvme_qpair *
1713 117 : nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair *qpair)
1714 : {
1715 : struct nvme_qpair *nvme_qpair;
1716 :
1717 134 : TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
1718 134 : if (nvme_qpair->qpair == qpair) {
1719 117 : break;
1720 : }
1721 : }
1722 :
1723 117 : return nvme_qpair;
1724 : }
1725 :
1726 : static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair);
1727 :
1728 : static void
1729 117 : bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
1730 : {
1731 117 : struct nvme_poll_group *group = poll_group_ctx;
1732 : struct nvme_qpair *nvme_qpair;
1733 : struct nvme_ctrlr *nvme_ctrlr;
1734 : struct nvme_ctrlr_channel *ctrlr_ch;
1735 : int status;
1736 :
1737 117 : nvme_qpair = nvme_poll_group_get_qpair(group, qpair);
1738 117 : if (nvme_qpair == NULL) {
1739 0 : return;
1740 : }
1741 :
1742 117 : if (nvme_qpair->qpair != NULL) {
1743 117 : spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
1744 117 : nvme_qpair->qpair = NULL;
1745 : }
1746 :
1747 117 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1748 :
1749 117 : nvme_ctrlr = nvme_qpair->ctrlr;
1750 117 : ctrlr_ch = nvme_qpair->ctrlr_ch;
1751 :
1752 117 : if (ctrlr_ch != NULL) {
1753 72 : if (ctrlr_ch->reset_iter != NULL) {
1754 : /* We are in a full reset sequence. */
1755 67 : if (ctrlr_ch->connect_poller != NULL) {
1756 : /* qpair was failed to connect. Abort the reset sequence. */
1757 0 : NVME_CTRLR_INFOLOG(nvme_ctrlr,
1758 : "qpair %p was failed to connect. abort the reset ctrlr sequence.\n",
1759 : qpair);
1760 0 : spdk_poller_unregister(&ctrlr_ch->connect_poller);
1761 0 : status = -1;
1762 : } else {
1763 : /* qpair was completed to disconnect. Just move to the next ctrlr_channel. */
1764 67 : NVME_CTRLR_INFOLOG(nvme_ctrlr,
1765 : "qpair %p was disconnected and freed in a reset ctrlr sequence.\n",
1766 : qpair);
1767 67 : status = 0;
1768 : }
1769 67 : nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, status);
1770 67 : ctrlr_ch->reset_iter = NULL;
1771 : } else {
1772 : /* qpair was disconnected unexpectedly. Reset controller for recovery. */
1773 5 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpair %p was disconnected and freed. reset controller.\n",
1774 : qpair);
1775 5 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
1776 : }
1777 : } else {
1778 : /* In this case, ctrlr_channel is already deleted. */
1779 45 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpair %p was disconnected and freed. delete nvme_qpair.\n",
1780 : qpair);
1781 45 : nvme_qpair_delete(nvme_qpair);
1782 : }
1783 : }
1784 :
1785 : static void
1786 0 : bdev_nvme_check_io_qpairs(struct nvme_poll_group *group)
1787 : {
1788 : struct nvme_qpair *nvme_qpair;
1789 :
1790 0 : TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
1791 0 : if (nvme_qpair->qpair == NULL || nvme_qpair->ctrlr_ch == NULL) {
1792 0 : continue;
1793 : }
1794 :
1795 0 : if (spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
1796 : SPDK_NVME_QPAIR_FAILURE_NONE) {
1797 0 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1798 : }
1799 : }
1800 0 : }
1801 :
1802 : static int
1803 1470 : bdev_nvme_poll(void *arg)
1804 : {
1805 1470 : struct nvme_poll_group *group = arg;
1806 : int64_t num_completions;
1807 :
1808 1470 : if (group->collect_spin_stat && group->start_ticks == 0) {
1809 0 : group->start_ticks = spdk_get_ticks();
1810 : }
1811 :
1812 1470 : num_completions = spdk_nvme_poll_group_process_completions(group->group, 0,
1813 : bdev_nvme_disconnected_qpair_cb);
1814 1470 : if (group->collect_spin_stat) {
1815 0 : if (num_completions > 0) {
1816 0 : if (group->end_ticks != 0) {
1817 0 : group->spin_ticks += (group->end_ticks - group->start_ticks);
1818 0 : group->end_ticks = 0;
1819 : }
1820 0 : group->start_ticks = 0;
1821 : } else {
1822 0 : group->end_ticks = spdk_get_ticks();
1823 : }
1824 : }
1825 :
1826 1470 : if (spdk_unlikely(num_completions < 0)) {
1827 0 : bdev_nvme_check_io_qpairs(group);
1828 : }
1829 :
1830 1470 : return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
1831 : }
1832 :
1833 : static int bdev_nvme_poll_adminq(void *arg);
1834 :
1835 : static void
1836 140 : bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_period_us)
1837 : {
1838 140 : spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
1839 :
1840 140 : nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq,
1841 : nvme_ctrlr, new_period_us);
1842 140 : }
1843 :
1844 : static int
1845 187 : bdev_nvme_poll_adminq(void *arg)
1846 : {
1847 : int32_t rc;
1848 187 : struct nvme_ctrlr *nvme_ctrlr = arg;
1849 : nvme_ctrlr_disconnected_cb disconnected_cb;
1850 :
1851 187 : assert(nvme_ctrlr != NULL);
1852 :
1853 187 : rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
1854 187 : if (rc < 0) {
1855 85 : disconnected_cb = nvme_ctrlr->disconnected_cb;
1856 85 : nvme_ctrlr->disconnected_cb = NULL;
1857 :
1858 85 : if (disconnected_cb != NULL) {
1859 70 : bdev_nvme_change_adminq_poll_period(nvme_ctrlr,
1860 : g_opts.nvme_adminq_poll_period_us);
1861 70 : disconnected_cb(nvme_ctrlr);
1862 : } else {
1863 15 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
1864 : }
1865 102 : } else if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(nvme_ctrlr->ctrlr) !=
1866 : SPDK_NVME_QPAIR_FAILURE_NONE) {
1867 0 : bdev_nvme_clear_io_path_caches(nvme_ctrlr);
1868 : }
1869 :
1870 187 : return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
1871 : }
1872 :
1873 : static void
1874 38 : nvme_bdev_free(void *io_device)
1875 : {
1876 38 : struct nvme_bdev *nvme_disk = io_device;
1877 :
1878 38 : pthread_mutex_destroy(&nvme_disk->mutex);
1879 38 : free(nvme_disk->disk.name);
1880 38 : free(nvme_disk->err_stat);
1881 38 : free(nvme_disk);
1882 38 : }
1883 :
1884 : static int
1885 37 : bdev_nvme_destruct(void *ctx)
1886 : {
1887 37 : struct nvme_bdev *nvme_disk = ctx;
1888 : struct nvme_ns *nvme_ns, *tmp_nvme_ns;
1889 :
1890 : SPDK_DTRACE_PROBE2(bdev_nvme_destruct, nvme_disk->nbdev_ctrlr->name, nvme_disk->nsid);
1891 :
1892 75 : TAILQ_FOREACH_SAFE(nvme_ns, &nvme_disk->nvme_ns_list, tailq, tmp_nvme_ns) {
1893 38 : pthread_mutex_lock(&nvme_ns->ctrlr->mutex);
1894 :
1895 38 : nvme_ns->bdev = NULL;
1896 :
1897 38 : assert(nvme_ns->id > 0);
1898 :
1899 38 : if (nvme_ctrlr_get_ns(nvme_ns->ctrlr, nvme_ns->id) == NULL) {
1900 0 : pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
1901 :
1902 0 : nvme_ctrlr_release(nvme_ns->ctrlr);
1903 0 : nvme_ns_free(nvme_ns);
1904 : } else {
1905 38 : pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
1906 : }
1907 : }
1908 :
1909 37 : pthread_mutex_lock(&g_bdev_nvme_mutex);
1910 37 : TAILQ_REMOVE(&nvme_disk->nbdev_ctrlr->bdevs, nvme_disk, tailq);
1911 37 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
1912 :
1913 37 : spdk_io_device_unregister(nvme_disk, nvme_bdev_free);
1914 :
1915 37 : return 0;
1916 : }
1917 :
1918 : static int
1919 118 : bdev_nvme_create_qpair(struct nvme_qpair *nvme_qpair)
1920 : {
1921 : struct nvme_ctrlr *nvme_ctrlr;
1922 118 : struct spdk_nvme_io_qpair_opts opts;
1923 : struct spdk_nvme_qpair *qpair;
1924 : int rc;
1925 :
1926 118 : nvme_ctrlr = nvme_qpair->ctrlr;
1927 :
1928 118 : spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
1929 118 : opts.delay_cmd_submit = g_opts.delay_cmd_submit;
1930 118 : opts.create_only = true;
1931 118 : opts.async_mode = true;
1932 118 : opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests);
1933 118 : g_opts.io_queue_requests = opts.io_queue_requests;
1934 :
1935 118 : qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
1936 118 : if (qpair == NULL) {
1937 0 : return -1;
1938 : }
1939 :
1940 : SPDK_DTRACE_PROBE3(bdev_nvme_create_qpair, nvme_ctrlr->nbdev_ctrlr->name,
1941 : spdk_nvme_qpair_get_id(qpair), spdk_thread_get_id(nvme_ctrlr->thread));
1942 :
1943 118 : assert(nvme_qpair->group != NULL);
1944 :
1945 118 : rc = spdk_nvme_poll_group_add(nvme_qpair->group->group, qpair);
1946 118 : if (rc != 0) {
1947 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Unable to begin polling on NVMe Channel.\n");
1948 0 : goto err;
1949 : }
1950 :
1951 118 : rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_ctrlr->ctrlr, qpair);
1952 118 : if (rc != 0) {
1953 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Unable to connect I/O qpair.\n");
1954 0 : goto err;
1955 : }
1956 :
1957 118 : nvme_qpair->qpair = qpair;
1958 :
1959 118 : if (!g_opts.disable_auto_failback) {
1960 85 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1961 : }
1962 :
1963 118 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Connecting qpair %p:%u started.\n",
1964 : qpair, spdk_nvme_qpair_get_id(qpair));
1965 :
1966 118 : return 0;
1967 :
1968 0 : err:
1969 0 : spdk_nvme_ctrlr_free_io_qpair(qpair);
1970 :
1971 0 : return rc;
1972 : }
1973 :
1974 : static void bdev_nvme_reset_io_continue(void *cb_arg, int rc);
1975 :
1976 : static void
1977 122 : bdev_nvme_complete_pending_resets(struct nvme_ctrlr_channel_iter *i,
1978 : struct nvme_ctrlr *nvme_ctrlr,
1979 : struct nvme_ctrlr_channel *ctrlr_ch,
1980 : void *ctx)
1981 : {
1982 122 : int rc = 0;
1983 : struct nvme_bdev_io *bio;
1984 :
1985 122 : if (ctx != NULL) {
1986 59 : rc = -1;
1987 : }
1988 :
1989 133 : while (!TAILQ_EMPTY(&ctrlr_ch->pending_resets)) {
1990 11 : bio = TAILQ_FIRST(&ctrlr_ch->pending_resets);
1991 11 : TAILQ_REMOVE(&ctrlr_ch->pending_resets, bio, retry_link);
1992 :
1993 11 : bdev_nvme_reset_io_continue(bio, rc);
1994 : }
1995 :
1996 122 : nvme_ctrlr_for_each_channel_continue(i, 0);
1997 122 : }
1998 :
1999 : /* This function marks the current trid as failed by storing the current ticks
2000 : * and then sets the next trid to the active trid within a controller if exists.
2001 : *
2002 : * The purpose of the boolean return value is to request the caller to disconnect
2003 : * the current trid now to try connecting the next trid.
2004 : */
2005 : static bool
2006 61 : bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove, bool start)
2007 : {
2008 : struct nvme_path_id *path_id, *next_path;
2009 : int rc __attribute__((unused));
2010 :
2011 61 : path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
2012 61 : assert(path_id);
2013 61 : assert(path_id == nvme_ctrlr->active_path_id);
2014 61 : next_path = TAILQ_NEXT(path_id, link);
2015 :
2016 : /* Update the last failed time. It means the trid is failed if its last
2017 : * failed time is non-zero.
2018 : */
2019 61 : path_id->last_failed_tsc = spdk_get_ticks();
2020 :
2021 61 : if (next_path == NULL) {
2022 : /* There is no alternate trid within a controller. */
2023 50 : return false;
2024 : }
2025 :
2026 11 : if (!start && nvme_ctrlr->opts.reconnect_delay_sec == 0) {
2027 : /* Connect is not retried in a controller reset sequence. Connecting
2028 : * the next trid will be done by the next bdev_nvme_failover_ctrlr() call.
2029 : */
2030 3 : return false;
2031 : }
2032 :
2033 8 : assert(path_id->trid.trtype != SPDK_NVME_TRANSPORT_PCIE);
2034 :
2035 8 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Start failover from %s:%s to %s:%s\n",
2036 : path_id->trid.traddr, path_id->trid.trsvcid,
2037 : next_path->trid.traddr, next_path->trid.trsvcid);
2038 :
2039 8 : spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
2040 8 : nvme_ctrlr->active_path_id = next_path;
2041 8 : rc = spdk_nvme_ctrlr_set_trid(nvme_ctrlr->ctrlr, &next_path->trid);
2042 8 : assert(rc == 0);
2043 8 : TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
2044 8 : if (!remove) {
2045 : /** Shuffle the old trid to the end of the list and use the new one.
2046 : * Allows for round robin through multiple connections.
2047 : */
2048 6 : TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, path_id, link);
2049 : } else {
2050 2 : free(path_id);
2051 : }
2052 :
2053 8 : if (start || next_path->last_failed_tsc == 0) {
2054 : /* bdev_nvme_failover_ctrlr() is just called or the next trid is not failed
2055 : * or used yet. Try the next trid now.
2056 : */
2057 7 : return true;
2058 : }
2059 :
2060 1 : if (spdk_get_ticks() > next_path->last_failed_tsc + spdk_get_ticks_hz() *
2061 1 : nvme_ctrlr->opts.reconnect_delay_sec) {
2062 : /* Enough backoff passed since the next trid failed. Try the next trid now. */
2063 0 : return true;
2064 : }
2065 :
2066 : /* The next trid will be tried after reconnect_delay_sec seconds. */
2067 1 : return false;
2068 : }
2069 :
2070 : static bool
2071 88 : bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr)
2072 : {
2073 : int32_t elapsed;
2074 :
2075 88 : if (nvme_ctrlr->opts.ctrlr_loss_timeout_sec == 0 ||
2076 37 : nvme_ctrlr->opts.ctrlr_loss_timeout_sec == -1) {
2077 62 : return false;
2078 : }
2079 :
2080 26 : elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
2081 26 : if (elapsed >= nvme_ctrlr->opts.ctrlr_loss_timeout_sec) {
2082 6 : return true;
2083 : } else {
2084 20 : return false;
2085 : }
2086 : }
2087 :
2088 : static bool
2089 12 : bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr)
2090 : {
2091 : uint32_t elapsed;
2092 :
2093 12 : if (nvme_ctrlr->opts.fast_io_fail_timeout_sec == 0) {
2094 8 : return false;
2095 : }
2096 :
2097 4 : elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
2098 4 : if (elapsed >= nvme_ctrlr->opts.fast_io_fail_timeout_sec) {
2099 2 : return true;
2100 : } else {
2101 2 : return false;
2102 : }
2103 : }
2104 :
2105 : static void bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success);
2106 :
2107 : static void
2108 71 : nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb cb_fn)
2109 : {
2110 : int rc;
2111 :
2112 71 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start disconnecting ctrlr.\n");
2113 :
2114 71 : rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
2115 71 : if (rc != 0) {
2116 1 : NVME_CTRLR_WARNLOG(nvme_ctrlr, "disconnecting ctrlr failed.\n");
2117 :
2118 : /* Disconnect fails if ctrlr is already resetting or removed. In this case,
2119 : * fail the reset sequence immediately.
2120 : */
2121 1 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2122 1 : return;
2123 : }
2124 :
2125 : /* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
2126 : * Set callback here to execute the specified operation after ctrlr is really disconnected.
2127 : */
2128 70 : assert(nvme_ctrlr->disconnected_cb == NULL);
2129 70 : nvme_ctrlr->disconnected_cb = cb_fn;
2130 :
2131 : /* During disconnection, reduce the period to poll adminq more often. */
2132 70 : bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0);
2133 : }
2134 :
2135 : enum bdev_nvme_op_after_reset {
2136 : OP_NONE,
2137 : OP_COMPLETE_PENDING_DESTRUCT,
2138 : OP_DESTRUCT,
2139 : OP_DELAYED_RECONNECT,
2140 : OP_FAILOVER,
2141 : };
2142 :
2143 : typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset;
2144 :
2145 : static _bdev_nvme_op_after_reset
2146 70 : bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success)
2147 : {
2148 70 : if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
2149 : /* Complete pending destruct after reset completes. */
2150 0 : return OP_COMPLETE_PENDING_DESTRUCT;
2151 70 : } else if (nvme_ctrlr->pending_failover) {
2152 3 : nvme_ctrlr->pending_failover = false;
2153 3 : nvme_ctrlr->reset_start_tsc = 0;
2154 3 : return OP_FAILOVER;
2155 67 : } else if (success || nvme_ctrlr->opts.reconnect_delay_sec == 0) {
2156 53 : nvme_ctrlr->reset_start_tsc = 0;
2157 53 : return OP_NONE;
2158 14 : } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
2159 2 : return OP_DESTRUCT;
2160 : } else {
2161 12 : if (bdev_nvme_check_fast_io_fail_timeout(nvme_ctrlr)) {
2162 2 : nvme_ctrlr->fast_io_fail_timedout = true;
2163 : }
2164 12 : return OP_DELAYED_RECONNECT;
2165 : }
2166 : }
2167 :
2168 : static int bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug);
2169 : static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
2170 :
2171 : static int
2172 9 : bdev_nvme_reconnect_delay_timer_expired(void *ctx)
2173 : {
2174 9 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2175 :
2176 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect_delay, nvme_ctrlr->nbdev_ctrlr->name);
2177 9 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2178 :
2179 9 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2180 :
2181 9 : if (!nvme_ctrlr->reconnect_is_delayed) {
2182 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2183 0 : return SPDK_POLLER_BUSY;
2184 : }
2185 :
2186 9 : nvme_ctrlr->reconnect_is_delayed = false;
2187 :
2188 9 : if (nvme_ctrlr->destruct) {
2189 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2190 0 : return SPDK_POLLER_BUSY;
2191 : }
2192 :
2193 9 : assert(nvme_ctrlr->resetting == false);
2194 9 : nvme_ctrlr->resetting = true;
2195 :
2196 9 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2197 :
2198 9 : spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
2199 :
2200 9 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2201 9 : return SPDK_POLLER_BUSY;
2202 : }
2203 :
2204 : static void
2205 12 : bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)
2206 : {
2207 12 : spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
2208 :
2209 12 : assert(nvme_ctrlr->reconnect_is_delayed == false);
2210 12 : nvme_ctrlr->reconnect_is_delayed = true;
2211 :
2212 12 : assert(nvme_ctrlr->reconnect_delay_timer == NULL);
2213 12 : nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired,
2214 : nvme_ctrlr,
2215 : nvme_ctrlr->opts.reconnect_delay_sec * SPDK_SEC_TO_USEC);
2216 12 : }
2217 :
2218 : static void remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr);
2219 :
2220 : static void
2221 68 : _bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
2222 : {
2223 68 : bool success = (ctx == NULL);
2224 68 : bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
2225 68 : void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
2226 : enum bdev_nvme_op_after_reset op_after_reset;
2227 :
2228 68 : assert(nvme_ctrlr->thread == spdk_get_thread());
2229 :
2230 68 : nvme_ctrlr->ctrlr_op_cb_fn = NULL;
2231 68 : nvme_ctrlr->ctrlr_op_cb_arg = NULL;
2232 :
2233 68 : if (!success) {
2234 33 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Resetting controller failed.\n");
2235 : } else {
2236 35 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Resetting controller successful.\n");
2237 : }
2238 :
2239 68 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2240 68 : nvme_ctrlr->resetting = false;
2241 68 : nvme_ctrlr->dont_retry = false;
2242 68 : nvme_ctrlr->in_failover = false;
2243 :
2244 68 : op_after_reset = bdev_nvme_check_op_after_reset(nvme_ctrlr, success);
2245 68 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2246 :
2247 : /* Delay callbacks when the next operation is a failover. */
2248 68 : if (ctrlr_op_cb_fn && op_after_reset != OP_FAILOVER) {
2249 17 : ctrlr_op_cb_fn(ctrlr_op_cb_arg, success ? 0 : -1);
2250 : }
2251 :
2252 68 : switch (op_after_reset) {
2253 0 : case OP_COMPLETE_PENDING_DESTRUCT:
2254 0 : nvme_ctrlr_unregister(nvme_ctrlr);
2255 0 : break;
2256 2 : case OP_DESTRUCT:
2257 2 : bdev_nvme_delete_ctrlr(nvme_ctrlr, false);
2258 2 : remove_discovery_entry(nvme_ctrlr);
2259 2 : break;
2260 12 : case OP_DELAYED_RECONNECT:
2261 12 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_start_reconnect_delay_timer);
2262 12 : break;
2263 3 : case OP_FAILOVER:
2264 3 : nvme_ctrlr->ctrlr_op_cb_fn = ctrlr_op_cb_fn;
2265 3 : nvme_ctrlr->ctrlr_op_cb_arg = ctrlr_op_cb_arg;
2266 3 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
2267 3 : break;
2268 51 : default:
2269 51 : break;
2270 : }
2271 68 : }
2272 :
2273 : static void
2274 70 : bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
2275 : {
2276 70 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2277 70 : if (!success) {
2278 : /* Connecting the active trid failed. Set the next alternate trid to the
2279 : * active trid if it exists.
2280 : */
2281 35 : if (bdev_nvme_failover_trid(nvme_ctrlr, false, false)) {
2282 : /* The next alternate trid exists and is ready to try. Try it now. */
2283 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2284 :
2285 2 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Try the next alternate trid %s:%s now.\n",
2286 : nvme_ctrlr->active_path_id->trid.traddr,
2287 : nvme_ctrlr->active_path_id->trid.trsvcid);
2288 :
2289 2 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
2290 2 : return;
2291 : }
2292 :
2293 : /* We came here if there is no alternate trid or if the next trid exists but
2294 : * is not ready to try. We will try the active trid after reconnect_delay_sec
2295 : * seconds if it is non-zero or at the next reset call otherwise.
2296 : */
2297 : } else {
2298 : /* Connecting the active trid succeeded. Clear the last failed time because it
2299 : * means the trid is failed if its last failed time is non-zero.
2300 : */
2301 35 : nvme_ctrlr->active_path_id->last_failed_tsc = 0;
2302 : }
2303 68 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2304 :
2305 68 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Clear pending resets.\n");
2306 :
2307 : /* Make sure we clear any pending resets before returning. */
2308 68 : nvme_ctrlr_for_each_channel(nvme_ctrlr,
2309 : bdev_nvme_complete_pending_resets,
2310 : success ? NULL : (void *)0x1,
2311 : _bdev_nvme_reset_ctrlr_complete);
2312 : }
2313 :
2314 : static void
2315 0 : bdev_nvme_reset_create_qpairs_failed(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
2316 : {
2317 0 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2318 0 : }
2319 :
2320 : static void
2321 102 : bdev_nvme_reset_destroy_qpair(struct nvme_ctrlr_channel_iter *i,
2322 : struct nvme_ctrlr *nvme_ctrlr,
2323 : struct nvme_ctrlr_channel *ctrlr_ch, void *ctx)
2324 : {
2325 : struct nvme_qpair *nvme_qpair;
2326 : struct spdk_nvme_qpair *qpair;
2327 :
2328 102 : nvme_qpair = ctrlr_ch->qpair;
2329 102 : assert(nvme_qpair != NULL);
2330 :
2331 102 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
2332 :
2333 102 : qpair = nvme_qpair->qpair;
2334 102 : if (qpair != NULL) {
2335 67 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start disconnecting qpair %p:%u.\n",
2336 : qpair, spdk_nvme_qpair_get_id(qpair));
2337 :
2338 67 : if (nvme_qpair->ctrlr->dont_retry) {
2339 53 : spdk_nvme_qpair_set_abort_dnr(qpair, true);
2340 : }
2341 67 : spdk_nvme_ctrlr_disconnect_io_qpair(qpair);
2342 :
2343 : /* The current full reset sequence will move to the next
2344 : * ctrlr_channel after the qpair is actually disconnected.
2345 : */
2346 67 : assert(ctrlr_ch->reset_iter == NULL);
2347 67 : ctrlr_ch->reset_iter = i;
2348 : } else {
2349 35 : nvme_ctrlr_for_each_channel_continue(i, 0);
2350 : }
2351 102 : }
2352 :
2353 : static void
2354 35 : bdev_nvme_reset_create_qpairs_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
2355 : {
2356 35 : if (status == 0) {
2357 35 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpairs were created after ctrlr reset.\n");
2358 :
2359 35 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, true);
2360 : } else {
2361 0 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpairs were failed to create after ctrlr reset.\n");
2362 :
2363 : /* Delete the added qpairs and quiesce ctrlr to make the states clean. */
2364 0 : nvme_ctrlr_for_each_channel(nvme_ctrlr,
2365 : bdev_nvme_reset_destroy_qpair,
2366 : NULL,
2367 : bdev_nvme_reset_create_qpairs_failed);
2368 : }
2369 35 : }
2370 :
2371 : static int
2372 59 : bdev_nvme_reset_check_qpair_connected(void *ctx)
2373 : {
2374 59 : struct nvme_ctrlr_channel *ctrlr_ch = ctx;
2375 59 : struct nvme_qpair *nvme_qpair = ctrlr_ch->qpair;
2376 : struct spdk_nvme_qpair *qpair;
2377 :
2378 59 : if (ctrlr_ch->reset_iter == NULL) {
2379 : /* qpair was already failed to connect and the reset sequence is being aborted. */
2380 0 : assert(ctrlr_ch->connect_poller == NULL);
2381 0 : assert(nvme_qpair->qpair == NULL);
2382 :
2383 0 : NVME_CTRLR_INFOLOG(nvme_qpair->ctrlr,
2384 : "qpair was already failed to connect. reset is being aborted.\n");
2385 0 : return SPDK_POLLER_BUSY;
2386 : }
2387 :
2388 59 : qpair = nvme_qpair->qpair;
2389 59 : assert(qpair != NULL);
2390 :
2391 59 : if (!spdk_nvme_qpair_is_connected(qpair)) {
2392 0 : return SPDK_POLLER_BUSY;
2393 : }
2394 :
2395 59 : NVME_CTRLR_INFOLOG(nvme_qpair->ctrlr, "qpair %p:%u was connected.\n",
2396 : qpair, spdk_nvme_qpair_get_id(qpair));
2397 :
2398 59 : spdk_poller_unregister(&ctrlr_ch->connect_poller);
2399 :
2400 : /* qpair was completed to connect. Move to the next ctrlr_channel */
2401 59 : nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
2402 59 : ctrlr_ch->reset_iter = NULL;
2403 :
2404 59 : if (!g_opts.disable_auto_failback) {
2405 44 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
2406 : }
2407 :
2408 59 : return SPDK_POLLER_BUSY;
2409 : }
2410 :
2411 : static void
2412 59 : bdev_nvme_reset_create_qpair(struct nvme_ctrlr_channel_iter *i,
2413 : struct nvme_ctrlr *nvme_ctrlr,
2414 : struct nvme_ctrlr_channel *ctrlr_ch,
2415 : void *ctx)
2416 : {
2417 59 : struct nvme_qpair *nvme_qpair = ctrlr_ch->qpair;
2418 : struct spdk_nvme_qpair *qpair;
2419 : int rc;
2420 :
2421 59 : rc = bdev_nvme_create_qpair(nvme_qpair);
2422 59 : if (rc == 0) {
2423 59 : ctrlr_ch->connect_poller = SPDK_POLLER_REGISTER(bdev_nvme_reset_check_qpair_connected,
2424 : ctrlr_ch, 0);
2425 :
2426 59 : qpair = nvme_qpair->qpair;
2427 :
2428 59 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start checking qpair %p:%u to be connected.\n",
2429 : qpair, spdk_nvme_qpair_get_id(qpair));
2430 :
2431 : /* The current full reset sequence will move to the next
2432 : * ctrlr_channel after the qpair is actually connected.
2433 : */
2434 59 : assert(ctrlr_ch->reset_iter == NULL);
2435 59 : ctrlr_ch->reset_iter = i;
2436 : } else {
2437 0 : nvme_ctrlr_for_each_channel_continue(i, rc);
2438 : }
2439 59 : }
2440 :
2441 : static void
2442 35 : nvme_ctrlr_check_namespaces(struct nvme_ctrlr *nvme_ctrlr)
2443 : {
2444 35 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
2445 : struct nvme_ns *nvme_ns;
2446 :
2447 35 : for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
2448 55 : nvme_ns != NULL;
2449 20 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
2450 20 : if (!spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
2451 1 : SPDK_DEBUGLOG(bdev_nvme, "NSID %u was removed during reset.\n", nvme_ns->id);
2452 : /* NS can be added again. Just nullify nvme_ns->ns. */
2453 1 : nvme_ns->ns = NULL;
2454 : }
2455 : }
2456 35 : }
2457 :
2458 :
2459 : static int
2460 69 : bdev_nvme_reconnect_ctrlr_poll(void *arg)
2461 : {
2462 69 : struct nvme_ctrlr *nvme_ctrlr = arg;
2463 : struct spdk_nvme_transport_id *trid;
2464 69 : int rc = -ETIMEDOUT;
2465 :
2466 69 : if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
2467 : /* Mark the ctrlr as failed. The next call to
2468 : * spdk_nvme_ctrlr_reconnect_poll_async() will then
2469 : * do the necessary cleanup and return failure.
2470 : */
2471 2 : spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
2472 : }
2473 :
2474 69 : rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
2475 69 : if (rc == -EAGAIN) {
2476 0 : return SPDK_POLLER_BUSY;
2477 : }
2478 :
2479 69 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
2480 69 : if (rc == 0) {
2481 35 : trid = &nvme_ctrlr->active_path_id->trid;
2482 :
2483 35 : if (spdk_nvme_trtype_is_fabrics(trid->trtype)) {
2484 35 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was connected to %s:%s. Create qpairs.\n",
2485 : trid->traddr, trid->trsvcid);
2486 : } else {
2487 0 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was connected. Create qpairs.\n");
2488 : }
2489 :
2490 35 : nvme_ctrlr_check_namespaces(nvme_ctrlr);
2491 :
2492 : /* Recreate all of the I/O queue pairs */
2493 35 : nvme_ctrlr_for_each_channel(nvme_ctrlr,
2494 : bdev_nvme_reset_create_qpair,
2495 : NULL,
2496 : bdev_nvme_reset_create_qpairs_done);
2497 : } else {
2498 34 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr could not be connected.\n");
2499 :
2500 34 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2501 : }
2502 69 : return SPDK_POLLER_BUSY;
2503 : }
2504 :
2505 : static void
2506 69 : bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2507 : {
2508 69 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start reconnecting ctrlr.\n");
2509 :
2510 69 : spdk_nvme_ctrlr_reconnect_async(nvme_ctrlr->ctrlr);
2511 :
2512 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect, nvme_ctrlr->nbdev_ctrlr->name);
2513 69 : assert(nvme_ctrlr->reset_detach_poller == NULL);
2514 69 : nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_ctrlr_poll,
2515 : nvme_ctrlr, 0);
2516 69 : }
2517 :
2518 : static void
2519 56 : bdev_nvme_reset_destroy_qpair_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
2520 : {
2521 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reset, nvme_ctrlr->nbdev_ctrlr->name);
2522 56 : assert(status == 0);
2523 :
2524 56 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpairs were deleted.\n");
2525 :
2526 56 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2527 0 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2528 : } else {
2529 56 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
2530 : }
2531 56 : }
2532 :
2533 : static void
2534 56 : bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
2535 : {
2536 56 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Delete qpairs for reset.\n");
2537 :
2538 56 : nvme_ctrlr_for_each_channel(nvme_ctrlr,
2539 : bdev_nvme_reset_destroy_qpair,
2540 : NULL,
2541 : bdev_nvme_reset_destroy_qpair_done);
2542 56 : }
2543 :
2544 : static void
2545 3 : bdev_nvme_reconnect_ctrlr_now(void *ctx)
2546 : {
2547 3 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2548 :
2549 3 : assert(nvme_ctrlr->resetting == true);
2550 3 : assert(nvme_ctrlr->thread == spdk_get_thread());
2551 :
2552 3 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2553 :
2554 3 : spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
2555 :
2556 3 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2557 3 : }
2558 :
2559 : static void
2560 56 : _bdev_nvme_reset_ctrlr(void *ctx)
2561 : {
2562 56 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2563 :
2564 56 : assert(nvme_ctrlr->resetting == true);
2565 56 : assert(nvme_ctrlr->thread == spdk_get_thread());
2566 :
2567 56 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2568 0 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reset_destroy_qpairs);
2569 : } else {
2570 56 : bdev_nvme_reset_destroy_qpairs(nvme_ctrlr);
2571 : }
2572 56 : }
2573 :
2574 : static int
2575 49 : bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2576 : {
2577 : spdk_msg_fn msg_fn;
2578 :
2579 49 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2580 49 : if (nvme_ctrlr->destruct) {
2581 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2582 3 : return -ENXIO;
2583 : }
2584 :
2585 46 : if (nvme_ctrlr->resetting) {
2586 13 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2587 13 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Unable to perform reset, already in progress.\n");
2588 13 : return -EBUSY;
2589 : }
2590 :
2591 33 : if (nvme_ctrlr->disabled) {
2592 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2593 1 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Unable to perform reset. Controller is disabled.\n");
2594 1 : return -EALREADY;
2595 : }
2596 :
2597 32 : nvme_ctrlr->resetting = true;
2598 32 : nvme_ctrlr->dont_retry = true;
2599 :
2600 32 : if (nvme_ctrlr->reconnect_is_delayed) {
2601 1 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Reconnect is already scheduled.\n");
2602 1 : msg_fn = bdev_nvme_reconnect_ctrlr_now;
2603 1 : nvme_ctrlr->reconnect_is_delayed = false;
2604 : } else {
2605 31 : msg_fn = _bdev_nvme_reset_ctrlr;
2606 31 : assert(nvme_ctrlr->reset_start_tsc == 0);
2607 : }
2608 :
2609 32 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2610 :
2611 32 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2612 :
2613 32 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
2614 32 : return 0;
2615 : }
2616 :
2617 : static int
2618 3 : bdev_nvme_enable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2619 : {
2620 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2621 3 : if (nvme_ctrlr->destruct) {
2622 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2623 0 : return -ENXIO;
2624 : }
2625 :
2626 3 : if (nvme_ctrlr->resetting) {
2627 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2628 0 : return -EBUSY;
2629 : }
2630 :
2631 3 : if (!nvme_ctrlr->disabled) {
2632 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2633 1 : return -EALREADY;
2634 : }
2635 :
2636 2 : nvme_ctrlr->disabled = false;
2637 2 : nvme_ctrlr->resetting = true;
2638 :
2639 2 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2640 :
2641 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2642 :
2643 2 : spdk_thread_send_msg(nvme_ctrlr->thread, bdev_nvme_reconnect_ctrlr_now, nvme_ctrlr);
2644 2 : return 0;
2645 : }
2646 :
2647 : static void
2648 2 : _bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
2649 : {
2650 2 : bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
2651 2 : void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
2652 : enum bdev_nvme_op_after_reset op_after_disable;
2653 :
2654 2 : assert(nvme_ctrlr->thread == spdk_get_thread());
2655 :
2656 2 : nvme_ctrlr->ctrlr_op_cb_fn = NULL;
2657 2 : nvme_ctrlr->ctrlr_op_cb_arg = NULL;
2658 :
2659 2 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2660 :
2661 2 : nvme_ctrlr->resetting = false;
2662 2 : nvme_ctrlr->dont_retry = false;
2663 :
2664 2 : op_after_disable = bdev_nvme_check_op_after_reset(nvme_ctrlr, true);
2665 :
2666 2 : nvme_ctrlr->disabled = true;
2667 2 : spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
2668 :
2669 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2670 :
2671 2 : if (ctrlr_op_cb_fn) {
2672 0 : ctrlr_op_cb_fn(ctrlr_op_cb_arg, 0);
2673 : }
2674 :
2675 2 : switch (op_after_disable) {
2676 0 : case OP_COMPLETE_PENDING_DESTRUCT:
2677 0 : nvme_ctrlr_unregister(nvme_ctrlr);
2678 0 : break;
2679 2 : default:
2680 2 : break;
2681 : }
2682 :
2683 2 : }
2684 :
2685 : static void
2686 2 : bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr)
2687 : {
2688 : /* Make sure we clear any pending resets before returning. */
2689 2 : nvme_ctrlr_for_each_channel(nvme_ctrlr,
2690 : bdev_nvme_complete_pending_resets,
2691 : NULL,
2692 : _bdev_nvme_disable_ctrlr_complete);
2693 2 : }
2694 :
2695 : static void
2696 1 : bdev_nvme_disable_destroy_qpairs_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
2697 : {
2698 1 : assert(status == 0);
2699 :
2700 1 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2701 0 : bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
2702 : } else {
2703 1 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_ctrlr_complete);
2704 : }
2705 1 : }
2706 :
2707 : static void
2708 1 : bdev_nvme_disable_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
2709 : {
2710 1 : nvme_ctrlr_for_each_channel(nvme_ctrlr,
2711 : bdev_nvme_reset_destroy_qpair,
2712 : NULL,
2713 : bdev_nvme_disable_destroy_qpairs_done);
2714 1 : }
2715 :
2716 : static void
2717 1 : _bdev_nvme_cancel_reconnect_and_disable_ctrlr(void *ctx)
2718 : {
2719 1 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2720 :
2721 1 : assert(nvme_ctrlr->resetting == true);
2722 1 : assert(nvme_ctrlr->thread == spdk_get_thread());
2723 :
2724 1 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2725 :
2726 1 : bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
2727 1 : }
2728 :
2729 : static void
2730 1 : _bdev_nvme_disconnect_and_disable_ctrlr(void *ctx)
2731 : {
2732 1 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2733 :
2734 1 : assert(nvme_ctrlr->resetting == true);
2735 1 : assert(nvme_ctrlr->thread == spdk_get_thread());
2736 :
2737 1 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2738 0 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_destroy_qpairs);
2739 : } else {
2740 1 : bdev_nvme_disable_destroy_qpairs(nvme_ctrlr);
2741 : }
2742 1 : }
2743 :
2744 : static int
2745 5 : bdev_nvme_disable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2746 : {
2747 : spdk_msg_fn msg_fn;
2748 :
2749 5 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2750 5 : if (nvme_ctrlr->destruct) {
2751 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2752 1 : return -ENXIO;
2753 : }
2754 :
2755 4 : if (nvme_ctrlr->resetting) {
2756 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2757 1 : return -EBUSY;
2758 : }
2759 :
2760 3 : if (nvme_ctrlr->disabled) {
2761 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2762 1 : return -EALREADY;
2763 : }
2764 :
2765 2 : nvme_ctrlr->resetting = true;
2766 2 : nvme_ctrlr->dont_retry = true;
2767 :
2768 2 : if (nvme_ctrlr->reconnect_is_delayed) {
2769 1 : msg_fn = _bdev_nvme_cancel_reconnect_and_disable_ctrlr;
2770 1 : nvme_ctrlr->reconnect_is_delayed = false;
2771 : } else {
2772 1 : msg_fn = _bdev_nvme_disconnect_and_disable_ctrlr;
2773 : }
2774 :
2775 2 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2776 :
2777 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2778 :
2779 2 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
2780 2 : return 0;
2781 : }
2782 :
2783 : static int
2784 31 : nvme_ctrlr_op(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
2785 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2786 : {
2787 : int rc;
2788 :
2789 31 : switch (op) {
2790 30 : case NVME_CTRLR_OP_RESET:
2791 30 : rc = bdev_nvme_reset_ctrlr(nvme_ctrlr);
2792 30 : break;
2793 0 : case NVME_CTRLR_OP_ENABLE:
2794 0 : rc = bdev_nvme_enable_ctrlr(nvme_ctrlr);
2795 0 : break;
2796 0 : case NVME_CTRLR_OP_DISABLE:
2797 0 : rc = bdev_nvme_disable_ctrlr(nvme_ctrlr);
2798 0 : break;
2799 1 : default:
2800 1 : rc = -EINVAL;
2801 1 : break;
2802 : }
2803 :
2804 31 : if (rc == 0) {
2805 16 : assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
2806 16 : assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
2807 16 : nvme_ctrlr->ctrlr_op_cb_fn = cb_fn;
2808 16 : nvme_ctrlr->ctrlr_op_cb_arg = cb_arg;
2809 : }
2810 31 : return rc;
2811 : }
2812 :
2813 : struct nvme_ctrlr_op_rpc_ctx {
2814 : struct nvme_ctrlr *nvme_ctrlr;
2815 : struct spdk_thread *orig_thread;
2816 : enum nvme_ctrlr_op op;
2817 : int rc;
2818 : bdev_nvme_ctrlr_op_cb cb_fn;
2819 : void *cb_arg;
2820 : };
2821 :
2822 : static void
2823 4 : _nvme_ctrlr_op_rpc_complete(void *_ctx)
2824 : {
2825 4 : struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
2826 :
2827 4 : assert(ctx != NULL);
2828 4 : assert(ctx->cb_fn != NULL);
2829 :
2830 4 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
2831 :
2832 4 : free(ctx);
2833 4 : }
2834 :
2835 : static void
2836 4 : nvme_ctrlr_op_rpc_complete(void *cb_arg, int rc)
2837 : {
2838 4 : struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
2839 :
2840 4 : ctx->rc = rc;
2841 :
2842 4 : spdk_thread_send_msg(ctx->orig_thread, _nvme_ctrlr_op_rpc_complete, ctx);
2843 4 : }
2844 :
2845 : void
2846 4 : nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
2847 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2848 : {
2849 : struct nvme_ctrlr_op_rpc_ctx *ctx;
2850 : int rc;
2851 :
2852 4 : assert(cb_fn != NULL);
2853 :
2854 4 : ctx = calloc(1, sizeof(*ctx));
2855 4 : if (ctx == NULL) {
2856 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
2857 0 : cb_fn(cb_arg, -ENOMEM);
2858 0 : return;
2859 : }
2860 :
2861 4 : ctx->orig_thread = spdk_get_thread();
2862 4 : ctx->cb_fn = cb_fn;
2863 4 : ctx->cb_arg = cb_arg;
2864 :
2865 4 : rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_ctrlr_op_rpc_complete, ctx);
2866 4 : if (rc == 0) {
2867 1 : return;
2868 3 : } else if (rc == -EALREADY) {
2869 0 : rc = 0;
2870 : }
2871 :
2872 3 : nvme_ctrlr_op_rpc_complete(ctx, rc);
2873 : }
2874 :
2875 : static void nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc);
2876 :
2877 : static void
2878 2 : _nvme_bdev_ctrlr_op_rpc_continue(void *_ctx)
2879 : {
2880 2 : struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
2881 : struct nvme_ctrlr *prev_nvme_ctrlr, *next_nvme_ctrlr;
2882 : int rc;
2883 :
2884 2 : prev_nvme_ctrlr = ctx->nvme_ctrlr;
2885 2 : ctx->nvme_ctrlr = NULL;
2886 :
2887 2 : if (ctx->rc != 0) {
2888 0 : goto complete;
2889 : }
2890 :
2891 2 : next_nvme_ctrlr = TAILQ_NEXT(prev_nvme_ctrlr, tailq);
2892 2 : if (next_nvme_ctrlr == NULL) {
2893 1 : goto complete;
2894 : }
2895 :
2896 1 : rc = nvme_ctrlr_op(next_nvme_ctrlr, ctx->op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
2897 1 : if (rc == 0) {
2898 1 : ctx->nvme_ctrlr = next_nvme_ctrlr;
2899 1 : return;
2900 0 : } else if (rc == -EALREADY) {
2901 0 : ctx->nvme_ctrlr = next_nvme_ctrlr;
2902 0 : rc = 0;
2903 : }
2904 :
2905 0 : ctx->rc = rc;
2906 :
2907 1 : complete:
2908 1 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
2909 1 : free(ctx);
2910 : }
2911 :
2912 : static void
2913 2 : nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc)
2914 : {
2915 2 : struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
2916 :
2917 2 : ctx->rc = rc;
2918 :
2919 2 : spdk_thread_send_msg(ctx->orig_thread, _nvme_bdev_ctrlr_op_rpc_continue, ctx);
2920 2 : }
2921 :
2922 : void
2923 1 : nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op,
2924 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2925 : {
2926 : struct nvme_ctrlr_op_rpc_ctx *ctx;
2927 : struct nvme_ctrlr *nvme_ctrlr;
2928 : int rc;
2929 :
2930 1 : assert(cb_fn != NULL);
2931 :
2932 1 : ctx = calloc(1, sizeof(*ctx));
2933 1 : if (ctx == NULL) {
2934 0 : SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
2935 0 : cb_fn(cb_arg, -ENOMEM);
2936 0 : return;
2937 : }
2938 :
2939 1 : ctx->orig_thread = spdk_get_thread();
2940 1 : ctx->op = op;
2941 1 : ctx->cb_fn = cb_fn;
2942 1 : ctx->cb_arg = cb_arg;
2943 :
2944 1 : nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
2945 1 : assert(nvme_ctrlr != NULL);
2946 :
2947 1 : rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
2948 1 : if (rc == 0) {
2949 1 : ctx->nvme_ctrlr = nvme_ctrlr;
2950 1 : return;
2951 0 : } else if (rc == -EALREADY) {
2952 0 : ctx->nvme_ctrlr = nvme_ctrlr;
2953 0 : rc = 0;
2954 : }
2955 :
2956 0 : nvme_bdev_ctrlr_op_rpc_continue(ctx, rc);
2957 : }
2958 :
2959 : static int _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio);
2960 :
2961 : static void
2962 15 : bdev_nvme_unfreeze_bdev_channel_done(struct nvme_bdev *nbdev, void *ctx, int status)
2963 : {
2964 15 : struct nvme_bdev_io *bio = ctx;
2965 : enum spdk_bdev_io_status io_status;
2966 :
2967 15 : if (bio->cpl.cdw0 == 0) {
2968 11 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
2969 : } else {
2970 4 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
2971 : }
2972 :
2973 15 : NVME_BDEV_INFOLOG(nbdev, "reset_io %p completed, status:%d\n", bio, io_status);
2974 :
2975 15 : __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), io_status, NULL);
2976 15 : }
2977 :
2978 : static void
2979 30 : bdev_nvme_unfreeze_bdev_channel(struct nvme_bdev_channel_iter *i,
2980 : struct nvme_bdev *nbdev,
2981 : struct nvme_bdev_channel *nbdev_ch, void *ctx)
2982 : {
2983 30 : bdev_nvme_abort_retry_ios(nbdev_ch);
2984 30 : nbdev_ch->resetting = false;
2985 :
2986 30 : nvme_bdev_for_each_channel_continue(i, 0);
2987 30 : }
2988 :
2989 : static void
2990 15 : bdev_nvme_reset_io_complete(struct nvme_bdev_io *bio)
2991 : {
2992 15 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
2993 15 : struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
2994 :
2995 : /* Abort all queued I/Os for retry. */
2996 15 : nvme_bdev_for_each_channel(nbdev,
2997 : bdev_nvme_unfreeze_bdev_channel,
2998 : bio,
2999 : bdev_nvme_unfreeze_bdev_channel_done);
3000 15 : }
3001 :
3002 : static void
3003 25 : _bdev_nvme_reset_io_continue(void *ctx)
3004 : {
3005 25 : struct nvme_bdev_io *bio = ctx;
3006 : struct nvme_io_path *prev_io_path, *next_io_path;
3007 : int rc;
3008 :
3009 25 : prev_io_path = bio->io_path;
3010 25 : bio->io_path = NULL;
3011 :
3012 25 : next_io_path = STAILQ_NEXT(prev_io_path, stailq);
3013 25 : if (next_io_path == NULL) {
3014 15 : goto complete;
3015 : }
3016 :
3017 10 : rc = _bdev_nvme_reset_io(next_io_path, bio);
3018 10 : if (rc == 0) {
3019 10 : return;
3020 : }
3021 :
3022 0 : complete:
3023 15 : bdev_nvme_reset_io_complete(bio);
3024 : }
3025 :
3026 : static void
3027 25 : bdev_nvme_reset_io_continue(void *cb_arg, int rc)
3028 : {
3029 25 : struct nvme_bdev_io *bio = cb_arg;
3030 25 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
3031 25 : struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
3032 :
3033 25 : NVME_BDEV_INFOLOG(nbdev, "continue reset_io %p, rc:%d\n", bio, rc);
3034 :
3035 : /* Reset status is initialized as "failed". Set to "success" once we have at least one
3036 : * successfully reset nvme_ctrlr.
3037 : */
3038 25 : if (rc == 0) {
3039 15 : bio->cpl.cdw0 = 0;
3040 : }
3041 :
3042 25 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), _bdev_nvme_reset_io_continue, bio);
3043 25 : }
3044 :
3045 : static int
3046 25 : _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
3047 : {
3048 25 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
3049 25 : struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
3050 25 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
3051 : struct nvme_ctrlr_channel *ctrlr_ch;
3052 : int rc;
3053 :
3054 25 : assert(bio->io_path == NULL);
3055 25 : bio->io_path = io_path;
3056 :
3057 25 : rc = nvme_ctrlr_op(nvme_ctrlr, NVME_CTRLR_OP_RESET,
3058 : bdev_nvme_reset_io_continue, bio);
3059 :
3060 25 : if (rc == 0) {
3061 13 : NVME_BDEV_INFOLOG(nbdev, "reset_io %p started resetting ctrlr [%s, %u].\n",
3062 : bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr));
3063 12 : } else if (rc == -EBUSY) {
3064 11 : ctrlr_ch = io_path->qpair->ctrlr_ch;
3065 11 : assert(ctrlr_ch != NULL);
3066 : /*
3067 : * Reset call is queued only if it is from the app framework. This is on purpose so that
3068 : * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
3069 : * upper level. If they are in the middle of a reset, we won't try to schedule another one.
3070 : */
3071 11 : TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bio, retry_link);
3072 :
3073 11 : rc = 0;
3074 :
3075 11 : NVME_BDEV_INFOLOG(nbdev, "reset_io %p was queued to ctrlr [%s, %u].\n",
3076 : bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr));
3077 : } else {
3078 1 : NVME_BDEV_INFOLOG(nbdev, "reset_io %p could not reset ctrlr [%s, %u], rc:%d\n",
3079 : bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr), rc);
3080 : }
3081 :
3082 25 : return rc;
3083 : }
3084 :
3085 : static void
3086 15 : bdev_nvme_freeze_bdev_channel_done(struct nvme_bdev *nbdev, void *ctx, int status)
3087 : {
3088 15 : struct nvme_bdev_io *bio = ctx;
3089 15 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
3090 : struct nvme_bdev_channel *nbdev_ch;
3091 : struct nvme_io_path *io_path;
3092 : int rc;
3093 :
3094 15 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
3095 :
3096 : /* Initialize with failed status. With multipath it is enough to have at least one successful
3097 : * nvme_ctrlr reset. If there is none, reset status will remain failed.
3098 : */
3099 15 : bio->cpl.cdw0 = 1;
3100 :
3101 : /* Reset all nvme_ctrlrs of a bdev controller sequentially. */
3102 15 : io_path = STAILQ_FIRST(&nbdev_ch->io_path_list);
3103 15 : assert(io_path != NULL);
3104 :
3105 15 : rc = _bdev_nvme_reset_io(io_path, bio);
3106 15 : if (rc != 0) {
3107 : /* If the current nvme_ctrlr is disabled, skip it and move to the next nvme_ctrlr. */
3108 1 : rc = (rc == -EALREADY) ? 0 : rc;
3109 :
3110 1 : bdev_nvme_reset_io_continue(bio, rc);
3111 : }
3112 15 : }
3113 :
3114 : static void
3115 30 : bdev_nvme_freeze_bdev_channel(struct nvme_bdev_channel_iter *i,
3116 : struct nvme_bdev *nbdev,
3117 : struct nvme_bdev_channel *nbdev_ch, void *ctx)
3118 : {
3119 30 : nbdev_ch->resetting = true;
3120 :
3121 30 : nvme_bdev_for_each_channel_continue(i, 0);
3122 30 : }
3123 :
3124 : static void
3125 15 : bdev_nvme_reset_io(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio)
3126 : {
3127 15 : NVME_BDEV_INFOLOG(nbdev, "reset_io %p started.\n", bio);
3128 :
3129 15 : nvme_bdev_for_each_channel(nbdev,
3130 : bdev_nvme_freeze_bdev_channel,
3131 : bio,
3132 : bdev_nvme_freeze_bdev_channel_done);
3133 15 : }
3134 :
3135 : static int
3136 31 : bdev_nvme_failover_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool remove)
3137 : {
3138 31 : if (nvme_ctrlr->destruct) {
3139 : /* Don't bother resetting if the controller is in the process of being destructed. */
3140 2 : return -ENXIO;
3141 : }
3142 :
3143 29 : if (nvme_ctrlr->resetting) {
3144 3 : if (!nvme_ctrlr->in_failover) {
3145 3 : NVME_CTRLR_NOTICELOG(nvme_ctrlr,
3146 : "Reset is already in progress. Defer failover until reset completes.\n");
3147 :
3148 : /* Defer failover until reset completes. */
3149 3 : nvme_ctrlr->pending_failover = true;
3150 3 : return -EINPROGRESS;
3151 : } else {
3152 0 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Unable to perform failover, already in progress.\n");
3153 0 : return -EBUSY;
3154 : }
3155 : }
3156 :
3157 26 : bdev_nvme_failover_trid(nvme_ctrlr, remove, true);
3158 :
3159 26 : if (nvme_ctrlr->reconnect_is_delayed) {
3160 1 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Reconnect is already scheduled.\n");
3161 :
3162 : /* We rely on the next reconnect for the failover. */
3163 1 : return -EALREADY;
3164 : }
3165 :
3166 25 : if (nvme_ctrlr->disabled) {
3167 0 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Controller is disabled.\n");
3168 :
3169 : /* We rely on the enablement for the failover. */
3170 0 : return -EALREADY;
3171 : }
3172 :
3173 25 : nvme_ctrlr->resetting = true;
3174 25 : nvme_ctrlr->in_failover = true;
3175 :
3176 25 : assert(nvme_ctrlr->reset_start_tsc == 0);
3177 25 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
3178 :
3179 25 : return 0;
3180 : }
3181 :
3182 : static int
3183 29 : bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
3184 : {
3185 : int rc;
3186 :
3187 29 : pthread_mutex_lock(&nvme_ctrlr->mutex);
3188 29 : rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, false);
3189 29 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
3190 :
3191 29 : if (rc == 0) {
3192 24 : spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset_ctrlr, nvme_ctrlr);
3193 5 : } else if (rc == -EALREADY) {
3194 0 : rc = 0;
3195 : }
3196 :
3197 29 : return rc;
3198 : }
3199 :
3200 : static int bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks,
3201 : uint64_t num_blocks);
3202 :
3203 : static int bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks,
3204 : uint64_t num_blocks);
3205 :
3206 : static int bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks,
3207 : uint64_t src_offset_blocks,
3208 : uint64_t num_blocks);
3209 :
3210 : static void
3211 1 : bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
3212 : bool success)
3213 : {
3214 1 : struct nvme_bdev_io *bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
3215 : int ret;
3216 :
3217 1 : if (!success) {
3218 0 : ret = -EINVAL;
3219 0 : goto exit;
3220 : }
3221 :
3222 1 : if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
3223 0 : ret = -ENXIO;
3224 0 : goto exit;
3225 : }
3226 :
3227 1 : ret = bdev_nvme_readv(bio,
3228 : bdev_io->u.bdev.iovs,
3229 : bdev_io->u.bdev.iovcnt,
3230 : bdev_io->u.bdev.md_buf,
3231 : bdev_io->u.bdev.num_blocks,
3232 : bdev_io->u.bdev.offset_blocks,
3233 : bdev_io->u.bdev.dif_check_flags,
3234 : bdev_io->u.bdev.memory_domain,
3235 : bdev_io->u.bdev.memory_domain_ctx,
3236 : bdev_io->u.bdev.accel_sequence);
3237 :
3238 1 : exit:
3239 1 : if (spdk_unlikely(ret != 0)) {
3240 0 : bdev_nvme_io_complete(bio, ret);
3241 : }
3242 1 : }
3243 :
3244 : static inline void
3245 59 : _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
3246 : {
3247 59 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
3248 59 : struct spdk_bdev *bdev = bdev_io->bdev;
3249 : struct nvme_bdev_io *nbdev_io_to_abort;
3250 59 : int rc = 0;
3251 :
3252 59 : switch (bdev_io->type) {
3253 3 : case SPDK_BDEV_IO_TYPE_READ:
3254 3 : if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) {
3255 :
3256 2 : rc = bdev_nvme_readv(nbdev_io,
3257 : bdev_io->u.bdev.iovs,
3258 : bdev_io->u.bdev.iovcnt,
3259 : bdev_io->u.bdev.md_buf,
3260 : bdev_io->u.bdev.num_blocks,
3261 : bdev_io->u.bdev.offset_blocks,
3262 : bdev_io->u.bdev.dif_check_flags,
3263 : bdev_io->u.bdev.memory_domain,
3264 : bdev_io->u.bdev.memory_domain_ctx,
3265 : bdev_io->u.bdev.accel_sequence);
3266 : } else {
3267 1 : spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
3268 1 : bdev_io->u.bdev.num_blocks * bdev->blocklen);
3269 1 : rc = 0;
3270 : }
3271 3 : break;
3272 25 : case SPDK_BDEV_IO_TYPE_WRITE:
3273 25 : rc = bdev_nvme_writev(nbdev_io,
3274 : bdev_io->u.bdev.iovs,
3275 : bdev_io->u.bdev.iovcnt,
3276 : bdev_io->u.bdev.md_buf,
3277 : bdev_io->u.bdev.num_blocks,
3278 : bdev_io->u.bdev.offset_blocks,
3279 : bdev_io->u.bdev.dif_check_flags,
3280 : bdev_io->u.bdev.memory_domain,
3281 : bdev_io->u.bdev.memory_domain_ctx,
3282 : bdev_io->u.bdev.accel_sequence,
3283 : bdev_io->u.bdev.nvme_cdw12,
3284 : bdev_io->u.bdev.nvme_cdw13);
3285 25 : break;
3286 1 : case SPDK_BDEV_IO_TYPE_COMPARE:
3287 1 : rc = bdev_nvme_comparev(nbdev_io,
3288 : bdev_io->u.bdev.iovs,
3289 : bdev_io->u.bdev.iovcnt,
3290 : bdev_io->u.bdev.md_buf,
3291 : bdev_io->u.bdev.num_blocks,
3292 : bdev_io->u.bdev.offset_blocks,
3293 : bdev_io->u.bdev.dif_check_flags);
3294 1 : break;
3295 2 : case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
3296 2 : rc = bdev_nvme_comparev_and_writev(nbdev_io,
3297 : bdev_io->u.bdev.iovs,
3298 : bdev_io->u.bdev.iovcnt,
3299 : bdev_io->u.bdev.fused_iovs,
3300 : bdev_io->u.bdev.fused_iovcnt,
3301 : bdev_io->u.bdev.md_buf,
3302 : bdev_io->u.bdev.num_blocks,
3303 : bdev_io->u.bdev.offset_blocks,
3304 : bdev_io->u.bdev.dif_check_flags);
3305 2 : break;
3306 1 : case SPDK_BDEV_IO_TYPE_UNMAP:
3307 1 : rc = bdev_nvme_unmap(nbdev_io,
3308 : bdev_io->u.bdev.offset_blocks,
3309 : bdev_io->u.bdev.num_blocks);
3310 1 : break;
3311 0 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3312 0 : rc = bdev_nvme_write_zeroes(nbdev_io,
3313 : bdev_io->u.bdev.offset_blocks,
3314 : bdev_io->u.bdev.num_blocks);
3315 0 : break;
3316 15 : case SPDK_BDEV_IO_TYPE_RESET:
3317 15 : nbdev_io->io_path = NULL;
3318 15 : bdev_nvme_reset_io(bdev->ctxt, nbdev_io);
3319 15 : return;
3320 :
3321 1 : case SPDK_BDEV_IO_TYPE_FLUSH:
3322 1 : bdev_nvme_io_complete(nbdev_io, 0);
3323 1 : return;
3324 :
3325 0 : case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
3326 0 : rc = bdev_nvme_zone_appendv(nbdev_io,
3327 : bdev_io->u.bdev.iovs,
3328 : bdev_io->u.bdev.iovcnt,
3329 : bdev_io->u.bdev.md_buf,
3330 : bdev_io->u.bdev.num_blocks,
3331 : bdev_io->u.bdev.offset_blocks,
3332 : bdev_io->u.bdev.dif_check_flags);
3333 0 : break;
3334 0 : case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
3335 0 : rc = bdev_nvme_get_zone_info(nbdev_io,
3336 : bdev_io->u.zone_mgmt.zone_id,
3337 : bdev_io->u.zone_mgmt.num_zones,
3338 0 : bdev_io->u.zone_mgmt.buf);
3339 0 : break;
3340 0 : case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
3341 0 : rc = bdev_nvme_zone_management(nbdev_io,
3342 : bdev_io->u.zone_mgmt.zone_id,
3343 : bdev_io->u.zone_mgmt.zone_action);
3344 0 : break;
3345 5 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3346 5 : nbdev_io->io_path = NULL;
3347 5 : bdev_nvme_admin_passthru(nbdev_ch,
3348 : nbdev_io,
3349 : &bdev_io->u.nvme_passthru.cmd,
3350 : bdev_io->u.nvme_passthru.buf,
3351 : bdev_io->u.nvme_passthru.nbytes);
3352 5 : return;
3353 :
3354 0 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3355 0 : rc = bdev_nvme_io_passthru(nbdev_io,
3356 : &bdev_io->u.nvme_passthru.cmd,
3357 : bdev_io->u.nvme_passthru.buf,
3358 : bdev_io->u.nvme_passthru.nbytes);
3359 0 : break;
3360 0 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3361 0 : rc = bdev_nvme_io_passthru_md(nbdev_io,
3362 : &bdev_io->u.nvme_passthru.cmd,
3363 : bdev_io->u.nvme_passthru.buf,
3364 : bdev_io->u.nvme_passthru.nbytes,
3365 : bdev_io->u.nvme_passthru.md_buf,
3366 : bdev_io->u.nvme_passthru.md_len);
3367 0 : break;
3368 0 : case SPDK_BDEV_IO_TYPE_NVME_IOV_MD:
3369 0 : rc = bdev_nvme_iov_passthru_md(nbdev_io,
3370 : &bdev_io->u.nvme_passthru.cmd,
3371 : bdev_io->u.nvme_passthru.iovs,
3372 : bdev_io->u.nvme_passthru.iovcnt,
3373 : bdev_io->u.nvme_passthru.nbytes,
3374 : bdev_io->u.nvme_passthru.md_buf,
3375 : bdev_io->u.nvme_passthru.md_len);
3376 0 : break;
3377 6 : case SPDK_BDEV_IO_TYPE_ABORT:
3378 6 : nbdev_io->io_path = NULL;
3379 6 : nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
3380 6 : bdev_nvme_abort(nbdev_ch,
3381 : nbdev_io,
3382 : nbdev_io_to_abort);
3383 6 : return;
3384 :
3385 0 : case SPDK_BDEV_IO_TYPE_COPY:
3386 0 : rc = bdev_nvme_copy(nbdev_io,
3387 : bdev_io->u.bdev.offset_blocks,
3388 : bdev_io->u.bdev.copy.src_offset_blocks,
3389 : bdev_io->u.bdev.num_blocks);
3390 0 : break;
3391 0 : default:
3392 0 : rc = -EINVAL;
3393 0 : break;
3394 : }
3395 :
3396 32 : if (spdk_unlikely(rc != 0)) {
3397 0 : bdev_nvme_io_complete(nbdev_io, rc);
3398 : }
3399 : }
3400 :
3401 : static void
3402 68 : bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
3403 : {
3404 68 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
3405 68 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
3406 :
3407 68 : if (spdk_likely(nbdev_io->submit_tsc == 0)) {
3408 68 : nbdev_io->submit_tsc = spdk_bdev_io_get_submit_tsc(bdev_io);
3409 : } else {
3410 : /* There are cases where submit_tsc != 0, i.e. retry I/O.
3411 : * We need to update submit_tsc here.
3412 : */
3413 0 : nbdev_io->submit_tsc = spdk_get_ticks();
3414 : }
3415 :
3416 68 : spdk_trace_record(TRACE_BDEV_NVME_IO_START, 0, 0, (uintptr_t)nbdev_io, (uintptr_t)bdev_io);
3417 68 : nbdev_io->io_path = bdev_nvme_find_io_path(nbdev_ch);
3418 68 : if (spdk_unlikely(!nbdev_io->io_path)) {
3419 13 : if (!bdev_nvme_io_type_is_admin(bdev_io->type)) {
3420 12 : bdev_nvme_io_complete(nbdev_io, -ENXIO);
3421 12 : return;
3422 : }
3423 :
3424 : /* Admin commands do not use the optimal I/O path.
3425 : * Simply fall through even if it is not found.
3426 : */
3427 : }
3428 :
3429 56 : _bdev_nvme_submit_request(nbdev_ch, bdev_io);
3430 : }
3431 :
3432 : static bool
3433 0 : bdev_nvme_is_supported_csi(enum spdk_nvme_csi csi)
3434 : {
3435 0 : switch (csi) {
3436 0 : case SPDK_NVME_CSI_NVM:
3437 0 : return true;
3438 0 : case SPDK_NVME_CSI_ZNS:
3439 0 : return true;
3440 0 : default:
3441 0 : return false;
3442 : }
3443 : }
3444 :
3445 : static bool
3446 0 : bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
3447 : {
3448 0 : struct nvme_bdev *nbdev = ctx;
3449 : struct nvme_ns *nvme_ns;
3450 : struct spdk_nvme_ns *ns;
3451 : struct spdk_nvme_ctrlr *ctrlr;
3452 : const struct spdk_nvme_ctrlr_data *cdata;
3453 :
3454 0 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
3455 0 : assert(nvme_ns != NULL);
3456 0 : ns = nvme_ns->ns;
3457 0 : if (ns == NULL) {
3458 0 : return false;
3459 : }
3460 :
3461 0 : if (!bdev_nvme_is_supported_csi(spdk_nvme_ns_get_csi(ns))) {
3462 0 : switch (io_type) {
3463 0 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3464 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3465 0 : return true;
3466 :
3467 0 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3468 0 : return spdk_nvme_ns_get_md_size(ns) ? true : false;
3469 :
3470 0 : default:
3471 0 : return false;
3472 : }
3473 : }
3474 :
3475 0 : ctrlr = spdk_nvme_ns_get_ctrlr(ns);
3476 :
3477 0 : switch (io_type) {
3478 0 : case SPDK_BDEV_IO_TYPE_READ:
3479 : case SPDK_BDEV_IO_TYPE_WRITE:
3480 : case SPDK_BDEV_IO_TYPE_RESET:
3481 : case SPDK_BDEV_IO_TYPE_FLUSH:
3482 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3483 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3484 : case SPDK_BDEV_IO_TYPE_ABORT:
3485 0 : return true;
3486 :
3487 0 : case SPDK_BDEV_IO_TYPE_COMPARE:
3488 0 : return spdk_nvme_ns_supports_compare(ns);
3489 :
3490 0 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3491 0 : return spdk_nvme_ns_get_md_size(ns) ? true : false;
3492 :
3493 0 : case SPDK_BDEV_IO_TYPE_UNMAP:
3494 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3495 0 : return cdata->oncs.dsm;
3496 :
3497 0 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3498 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3499 0 : return cdata->oncs.write_zeroes;
3500 :
3501 0 : case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
3502 0 : if (spdk_nvme_ctrlr_get_flags(ctrlr) &
3503 : SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) {
3504 0 : return true;
3505 : }
3506 0 : return false;
3507 :
3508 0 : case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
3509 : case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
3510 0 : return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS;
3511 :
3512 0 : case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
3513 0 : return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS &&
3514 0 : spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
3515 :
3516 0 : case SPDK_BDEV_IO_TYPE_COPY:
3517 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3518 0 : return cdata->oncs.copy;
3519 :
3520 0 : default:
3521 0 : return false;
3522 : }
3523 : }
3524 :
3525 : static int
3526 59 : nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrlr_ch)
3527 : {
3528 : struct nvme_qpair *nvme_qpair;
3529 : struct spdk_io_channel *pg_ch;
3530 : int rc;
3531 :
3532 59 : nvme_qpair = calloc(1, sizeof(*nvme_qpair));
3533 59 : if (!nvme_qpair) {
3534 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to alloc nvme_qpair.\n");
3535 0 : return -1;
3536 : }
3537 :
3538 59 : TAILQ_INIT(&nvme_qpair->io_path_list);
3539 :
3540 59 : nvme_qpair->ctrlr = nvme_ctrlr;
3541 59 : nvme_qpair->ctrlr_ch = ctrlr_ch;
3542 :
3543 59 : pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);
3544 59 : if (!pg_ch) {
3545 0 : free(nvme_qpair);
3546 0 : return -1;
3547 : }
3548 :
3549 59 : nvme_qpair->group = spdk_io_channel_get_ctx(pg_ch);
3550 :
3551 : #ifdef SPDK_CONFIG_VTUNE
3552 : nvme_qpair->group->collect_spin_stat = true;
3553 : #else
3554 59 : nvme_qpair->group->collect_spin_stat = false;
3555 : #endif
3556 :
3557 59 : if (!nvme_ctrlr->disabled) {
3558 : /* If a nvme_ctrlr is disabled, don't try to create qpair for it. Qpair will
3559 : * be created when it's enabled.
3560 : */
3561 59 : rc = bdev_nvme_create_qpair(nvme_qpair);
3562 59 : if (rc != 0) {
3563 : /* nvme_ctrlr can't create IO qpair if connection is down.
3564 : * If reconnect_delay_sec is non-zero, creating IO qpair is retried
3565 : * after reconnect_delay_sec seconds. If bdev_retry_count is non-zero,
3566 : * submitted IO will be queued until IO qpair is successfully created.
3567 : *
3568 : * Hence, if both are satisfied, ignore the failure.
3569 : */
3570 0 : if (nvme_ctrlr->opts.reconnect_delay_sec == 0 || g_opts.bdev_retry_count == 0) {
3571 0 : spdk_put_io_channel(pg_ch);
3572 0 : free(nvme_qpair);
3573 0 : return rc;
3574 : }
3575 : }
3576 : }
3577 :
3578 59 : TAILQ_INSERT_TAIL(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
3579 :
3580 59 : ctrlr_ch->qpair = nvme_qpair;
3581 :
3582 59 : pthread_mutex_lock(&nvme_qpair->ctrlr->mutex);
3583 59 : nvme_qpair->ctrlr->ref++;
3584 59 : pthread_mutex_unlock(&nvme_qpair->ctrlr->mutex);
3585 :
3586 59 : return 0;
3587 : }
3588 :
3589 : static int
3590 59 : bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
3591 : {
3592 59 : struct nvme_ctrlr *nvme_ctrlr = io_device;
3593 59 : struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
3594 :
3595 59 : TAILQ_INIT(&ctrlr_ch->pending_resets);
3596 :
3597 59 : return nvme_qpair_create(nvme_ctrlr, ctrlr_ch);
3598 : }
3599 :
3600 : static void
3601 59 : nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
3602 : {
3603 : struct nvme_io_path *io_path, *next;
3604 :
3605 59 : assert(nvme_qpair->group != NULL);
3606 :
3607 96 : TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
3608 37 : TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
3609 37 : nvme_io_path_free(io_path);
3610 : }
3611 :
3612 59 : TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
3613 :
3614 59 : spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
3615 :
3616 59 : nvme_ctrlr_release(nvme_qpair->ctrlr);
3617 :
3618 59 : free(nvme_qpair);
3619 59 : }
3620 :
3621 : static void
3622 59 : bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
3623 : {
3624 59 : struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
3625 : struct nvme_qpair *nvme_qpair;
3626 :
3627 59 : nvme_qpair = ctrlr_ch->qpair;
3628 59 : assert(nvme_qpair != NULL);
3629 :
3630 59 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
3631 :
3632 59 : if (nvme_qpair->qpair != NULL) {
3633 45 : if (ctrlr_ch->reset_iter == NULL) {
3634 45 : spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
3635 : } else {
3636 : /* Skip current ctrlr_channel in a full reset sequence because
3637 : * it is being deleted now. The qpair is already being disconnected.
3638 : * We do not have to restart disconnecting it.
3639 : */
3640 0 : nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
3641 : }
3642 :
3643 : /* We cannot release a reference to the poll group now.
3644 : * The qpair may be disconnected asynchronously later.
3645 : * We need to poll it until it is actually disconnected.
3646 : * Just detach the qpair from the deleting ctrlr_channel.
3647 : */
3648 45 : nvme_qpair->ctrlr_ch = NULL;
3649 : } else {
3650 14 : assert(ctrlr_ch->reset_iter == NULL);
3651 :
3652 14 : nvme_qpair_delete(nvme_qpair);
3653 : }
3654 59 : }
3655 :
3656 : static inline struct spdk_io_channel *
3657 0 : bdev_nvme_get_accel_channel(struct nvme_poll_group *group)
3658 : {
3659 0 : if (spdk_unlikely(!group->accel_channel)) {
3660 0 : group->accel_channel = spdk_accel_get_io_channel();
3661 0 : if (!group->accel_channel) {
3662 0 : SPDK_ERRLOG("Cannot get the accel_channel for bdev nvme polling group=%p\n",
3663 : group);
3664 0 : return NULL;
3665 : }
3666 : }
3667 :
3668 0 : return group->accel_channel;
3669 : }
3670 :
3671 : static void
3672 0 : bdev_nvme_finish_sequence(void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
3673 : {
3674 0 : spdk_accel_sequence_finish(seq, cb_fn, cb_arg);
3675 0 : }
3676 :
3677 : static void
3678 0 : bdev_nvme_abort_sequence(void *seq)
3679 : {
3680 0 : spdk_accel_sequence_abort(seq);
3681 0 : }
3682 :
3683 : static void
3684 0 : bdev_nvme_reverse_sequence(void *seq)
3685 : {
3686 0 : spdk_accel_sequence_reverse(seq);
3687 0 : }
3688 :
3689 : static int
3690 0 : bdev_nvme_append_crc32c(void *ctx, void **seq, uint32_t *dst, struct iovec *iovs, uint32_t iovcnt,
3691 : struct spdk_memory_domain *domain, void *domain_ctx, uint32_t seed,
3692 : spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
3693 : {
3694 : struct spdk_io_channel *ch;
3695 0 : struct nvme_poll_group *group = ctx;
3696 :
3697 0 : ch = bdev_nvme_get_accel_channel(group);
3698 0 : if (spdk_unlikely(ch == NULL)) {
3699 0 : return -ENOMEM;
3700 : }
3701 :
3702 0 : return spdk_accel_append_crc32c((struct spdk_accel_sequence **)seq, ch, dst, iovs, iovcnt,
3703 : domain, domain_ctx, seed, cb_fn, cb_arg);
3704 : }
3705 :
3706 : static int
3707 0 : bdev_nvme_append_copy(void *ctx, void **seq, struct iovec *dst_iovs, uint32_t dst_iovcnt,
3708 : struct spdk_memory_domain *dst_domain, void *dst_domain_ctx,
3709 : struct iovec *src_iovs, uint32_t src_iovcnt,
3710 : struct spdk_memory_domain *src_domain, void *src_domain_ctx,
3711 : spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
3712 : {
3713 : struct spdk_io_channel *ch;
3714 0 : struct nvme_poll_group *group = ctx;
3715 :
3716 0 : ch = bdev_nvme_get_accel_channel(group);
3717 0 : if (spdk_unlikely(ch == NULL)) {
3718 0 : return -ENOMEM;
3719 : }
3720 :
3721 0 : return spdk_accel_append_copy((struct spdk_accel_sequence **)seq, ch,
3722 : dst_iovs, dst_iovcnt, dst_domain, dst_domain_ctx,
3723 : src_iovs, src_iovcnt, src_domain, src_domain_ctx,
3724 : cb_fn, cb_arg);
3725 : }
3726 :
3727 : static struct spdk_nvme_accel_fn_table g_bdev_nvme_accel_fn_table = {
3728 : .table_size = sizeof(struct spdk_nvme_accel_fn_table),
3729 : .append_crc32c = bdev_nvme_append_crc32c,
3730 : .append_copy = bdev_nvme_append_copy,
3731 : .finish_sequence = bdev_nvme_finish_sequence,
3732 : .reverse_sequence = bdev_nvme_reverse_sequence,
3733 : .abort_sequence = bdev_nvme_abort_sequence,
3734 : };
3735 :
3736 : static int
3737 44 : bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf)
3738 : {
3739 44 : struct nvme_poll_group *group = ctx_buf;
3740 :
3741 44 : TAILQ_INIT(&group->qpair_list);
3742 :
3743 44 : group->group = spdk_nvme_poll_group_create(group, &g_bdev_nvme_accel_fn_table);
3744 44 : if (group->group == NULL) {
3745 0 : return -1;
3746 : }
3747 :
3748 44 : group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us);
3749 :
3750 44 : if (group->poller == NULL) {
3751 0 : spdk_nvme_poll_group_destroy(group->group);
3752 0 : return -1;
3753 : }
3754 :
3755 44 : return 0;
3756 : }
3757 :
3758 : static void
3759 44 : bdev_nvme_destroy_poll_group_cb(void *io_device, void *ctx_buf)
3760 : {
3761 44 : struct nvme_poll_group *group = ctx_buf;
3762 :
3763 44 : assert(TAILQ_EMPTY(&group->qpair_list));
3764 :
3765 44 : if (group->accel_channel) {
3766 0 : spdk_put_io_channel(group->accel_channel);
3767 : }
3768 :
3769 44 : spdk_poller_unregister(&group->poller);
3770 44 : if (spdk_nvme_poll_group_destroy(group->group)) {
3771 0 : SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n");
3772 0 : assert(false);
3773 : }
3774 44 : }
3775 :
3776 : static struct spdk_io_channel *
3777 0 : bdev_nvme_get_io_channel(void *ctx)
3778 : {
3779 0 : struct nvme_bdev *nvme_bdev = ctx;
3780 :
3781 0 : return spdk_get_io_channel(nvme_bdev);
3782 : }
3783 :
3784 : static void *
3785 0 : bdev_nvme_get_module_ctx(void *ctx)
3786 : {
3787 0 : struct nvme_bdev *nvme_bdev = ctx;
3788 : struct nvme_ns *nvme_ns;
3789 :
3790 0 : if (!nvme_bdev || nvme_bdev->disk.module != &nvme_if) {
3791 0 : return NULL;
3792 : }
3793 :
3794 0 : nvme_ns = TAILQ_FIRST(&nvme_bdev->nvme_ns_list);
3795 0 : if (!nvme_ns) {
3796 0 : return NULL;
3797 : }
3798 :
3799 0 : return nvme_ns->ns;
3800 : }
3801 :
3802 : static const char *
3803 0 : _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state)
3804 : {
3805 0 : switch (ana_state) {
3806 0 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
3807 0 : return "optimized";
3808 0 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
3809 0 : return "non_optimized";
3810 0 : case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3811 0 : return "inaccessible";
3812 0 : case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3813 0 : return "persistent_loss";
3814 0 : case SPDK_NVME_ANA_CHANGE_STATE:
3815 0 : return "change";
3816 0 : default:
3817 0 : return NULL;
3818 : }
3819 : }
3820 :
3821 : static int
3822 8 : bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
3823 : {
3824 8 : struct spdk_memory_domain **_domains = NULL;
3825 8 : struct nvme_bdev *nbdev = ctx;
3826 : struct nvme_ns *nvme_ns;
3827 8 : int i = 0, _array_size = array_size;
3828 8 : int rc = 0;
3829 :
3830 22 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
3831 14 : if (domains && array_size >= i) {
3832 11 : _domains = &domains[i];
3833 : } else {
3834 3 : _domains = NULL;
3835 : }
3836 14 : rc = spdk_nvme_ctrlr_get_memory_domains(nvme_ns->ctrlr->ctrlr, _domains, _array_size);
3837 14 : if (rc > 0) {
3838 13 : i += rc;
3839 13 : if (_array_size >= rc) {
3840 9 : _array_size -= rc;
3841 : } else {
3842 4 : _array_size = 0;
3843 : }
3844 1 : } else if (rc < 0) {
3845 0 : return rc;
3846 : }
3847 : }
3848 :
3849 8 : return i;
3850 : }
3851 :
3852 : static const char *
3853 0 : nvme_ctrlr_get_state_str(struct nvme_ctrlr *nvme_ctrlr)
3854 : {
3855 0 : if (nvme_ctrlr->destruct) {
3856 0 : return "deleting";
3857 0 : } else if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
3858 0 : return "failed";
3859 0 : } else if (nvme_ctrlr->resetting) {
3860 0 : return "resetting";
3861 0 : } else if (nvme_ctrlr->reconnect_is_delayed > 0) {
3862 0 : return "reconnect_is_delayed";
3863 0 : } else if (nvme_ctrlr->disabled) {
3864 0 : return "disabled";
3865 : } else {
3866 0 : return "enabled";
3867 : }
3868 : }
3869 :
3870 : void
3871 0 : nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr)
3872 0 : {
3873 : struct spdk_nvme_transport_id *trid;
3874 : const struct spdk_nvme_ctrlr_opts *opts;
3875 : const struct spdk_nvme_ctrlr_data *cdata;
3876 : struct nvme_path_id *path_id;
3877 : int32_t numa_id;
3878 :
3879 0 : spdk_json_write_object_begin(w);
3880 :
3881 0 : spdk_json_write_named_string(w, "state", nvme_ctrlr_get_state_str(nvme_ctrlr));
3882 :
3883 : #ifdef SPDK_CONFIG_NVME_CUSE
3884 0 : size_t cuse_name_size = 128;
3885 0 : char cuse_name[cuse_name_size];
3886 :
3887 0 : int rc = spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr, cuse_name, &cuse_name_size);
3888 0 : if (rc == 0) {
3889 0 : spdk_json_write_named_string(w, "cuse_device", cuse_name);
3890 : }
3891 : #endif
3892 0 : trid = &nvme_ctrlr->active_path_id->trid;
3893 0 : spdk_json_write_named_object_begin(w, "trid");
3894 0 : nvme_bdev_dump_trid_json(trid, w);
3895 0 : spdk_json_write_object_end(w);
3896 :
3897 0 : path_id = TAILQ_NEXT(nvme_ctrlr->active_path_id, link);
3898 0 : if (path_id != NULL) {
3899 0 : spdk_json_write_named_array_begin(w, "alternate_trids");
3900 : do {
3901 0 : trid = &path_id->trid;
3902 0 : spdk_json_write_object_begin(w);
3903 0 : nvme_bdev_dump_trid_json(trid, w);
3904 0 : spdk_json_write_object_end(w);
3905 :
3906 0 : path_id = TAILQ_NEXT(path_id, link);
3907 0 : } while (path_id != NULL);
3908 0 : spdk_json_write_array_end(w);
3909 : }
3910 :
3911 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
3912 0 : spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
3913 :
3914 0 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
3915 0 : spdk_json_write_named_object_begin(w, "host");
3916 0 : spdk_json_write_named_string(w, "nqn", opts->hostnqn);
3917 0 : spdk_json_write_named_string(w, "addr", opts->src_addr);
3918 0 : spdk_json_write_named_string(w, "svcid", opts->src_svcid);
3919 0 : spdk_json_write_object_end(w);
3920 :
3921 0 : numa_id = spdk_nvme_ctrlr_get_numa_id(nvme_ctrlr->ctrlr);
3922 0 : if (numa_id != SPDK_ENV_NUMA_ID_ANY) {
3923 0 : spdk_json_write_named_uint32(w, "numa_id", numa_id);
3924 : }
3925 0 : spdk_json_write_object_end(w);
3926 0 : }
3927 :
3928 : static void
3929 0 : nvme_namespace_info_json(struct spdk_json_write_ctx *w,
3930 : struct nvme_ns *nvme_ns)
3931 0 : {
3932 : struct spdk_nvme_ns *ns;
3933 : struct spdk_nvme_ctrlr *ctrlr;
3934 : const struct spdk_nvme_ctrlr_data *cdata;
3935 : const struct spdk_nvme_transport_id *trid;
3936 : union spdk_nvme_vs_register vs;
3937 : const struct spdk_nvme_ns_data *nsdata;
3938 0 : char buf[128];
3939 :
3940 0 : ns = nvme_ns->ns;
3941 0 : if (ns == NULL) {
3942 0 : return;
3943 : }
3944 :
3945 0 : ctrlr = spdk_nvme_ns_get_ctrlr(ns);
3946 :
3947 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3948 0 : trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
3949 0 : vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr);
3950 :
3951 0 : spdk_json_write_object_begin(w);
3952 :
3953 0 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
3954 0 : spdk_json_write_named_string(w, "pci_address", trid->traddr);
3955 : }
3956 :
3957 0 : spdk_json_write_named_object_begin(w, "trid");
3958 :
3959 0 : nvme_bdev_dump_trid_json(trid, w);
3960 :
3961 0 : spdk_json_write_object_end(w);
3962 :
3963 : #ifdef SPDK_CONFIG_NVME_CUSE
3964 0 : size_t cuse_name_size = 128;
3965 0 : char cuse_name[cuse_name_size];
3966 :
3967 0 : int rc = spdk_nvme_cuse_get_ns_name(ctrlr, spdk_nvme_ns_get_id(ns),
3968 : cuse_name, &cuse_name_size);
3969 0 : if (rc == 0) {
3970 0 : spdk_json_write_named_string(w, "cuse_device", cuse_name);
3971 : }
3972 : #endif
3973 :
3974 0 : spdk_json_write_named_object_begin(w, "ctrlr_data");
3975 :
3976 0 : spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
3977 :
3978 0 : spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
3979 :
3980 0 : snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
3981 0 : spdk_str_trim(buf);
3982 0 : spdk_json_write_named_string(w, "model_number", buf);
3983 :
3984 0 : snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
3985 0 : spdk_str_trim(buf);
3986 0 : spdk_json_write_named_string(w, "serial_number", buf);
3987 :
3988 0 : snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
3989 0 : spdk_str_trim(buf);
3990 0 : spdk_json_write_named_string(w, "firmware_revision", buf);
3991 :
3992 0 : if (cdata->subnqn[0] != '\0') {
3993 0 : spdk_json_write_named_string(w, "subnqn", cdata->subnqn);
3994 : }
3995 :
3996 0 : spdk_json_write_named_object_begin(w, "oacs");
3997 :
3998 0 : spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
3999 0 : spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
4000 0 : spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
4001 0 : spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
4002 :
4003 0 : spdk_json_write_object_end(w);
4004 :
4005 0 : spdk_json_write_named_bool(w, "multi_ctrlr", cdata->cmic.multi_ctrlr);
4006 0 : spdk_json_write_named_bool(w, "ana_reporting", cdata->cmic.ana_reporting);
4007 :
4008 0 : spdk_json_write_object_end(w);
4009 :
4010 0 : spdk_json_write_named_object_begin(w, "vs");
4011 :
4012 0 : spdk_json_write_name(w, "nvme_version");
4013 0 : if (vs.bits.ter) {
4014 0 : spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
4015 : } else {
4016 0 : spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
4017 : }
4018 :
4019 0 : spdk_json_write_object_end(w);
4020 :
4021 0 : nsdata = spdk_nvme_ns_get_data(ns);
4022 :
4023 0 : spdk_json_write_named_object_begin(w, "ns_data");
4024 :
4025 0 : spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
4026 :
4027 0 : if (cdata->cmic.ana_reporting) {
4028 0 : spdk_json_write_named_string(w, "ana_state",
4029 : _nvme_ana_state_str(nvme_ns->ana_state));
4030 : }
4031 :
4032 0 : spdk_json_write_named_bool(w, "can_share", nsdata->nmic.can_share);
4033 :
4034 0 : spdk_json_write_object_end(w);
4035 :
4036 0 : if (cdata->oacs.security) {
4037 0 : spdk_json_write_named_object_begin(w, "security");
4038 :
4039 0 : spdk_json_write_named_bool(w, "opal", nvme_ns->bdev->opal);
4040 :
4041 0 : spdk_json_write_object_end(w);
4042 : }
4043 :
4044 0 : spdk_json_write_object_end(w);
4045 : }
4046 :
4047 : static const char *
4048 0 : nvme_bdev_get_mp_policy_str(struct nvme_bdev *nbdev)
4049 : {
4050 0 : switch (nbdev->mp_policy) {
4051 0 : case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
4052 0 : return "active_passive";
4053 0 : case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
4054 0 : return "active_active";
4055 0 : default:
4056 0 : assert(false);
4057 : return "invalid";
4058 : }
4059 : }
4060 :
4061 : static const char *
4062 0 : nvme_bdev_get_mp_selector_str(struct nvme_bdev *nbdev)
4063 : {
4064 0 : switch (nbdev->mp_selector) {
4065 0 : case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
4066 0 : return "round_robin";
4067 0 : case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
4068 0 : return "queue_depth";
4069 0 : default:
4070 0 : assert(false);
4071 : return "invalid";
4072 : }
4073 : }
4074 :
4075 : static int
4076 0 : bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
4077 : {
4078 0 : struct nvme_bdev *nvme_bdev = ctx;
4079 : struct nvme_ns *nvme_ns;
4080 :
4081 0 : pthread_mutex_lock(&nvme_bdev->mutex);
4082 0 : spdk_json_write_named_array_begin(w, "nvme");
4083 0 : TAILQ_FOREACH(nvme_ns, &nvme_bdev->nvme_ns_list, tailq) {
4084 0 : nvme_namespace_info_json(w, nvme_ns);
4085 : }
4086 0 : spdk_json_write_array_end(w);
4087 0 : spdk_json_write_named_string(w, "mp_policy", nvme_bdev_get_mp_policy_str(nvme_bdev));
4088 0 : if (nvme_bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
4089 0 : spdk_json_write_named_string(w, "selector", nvme_bdev_get_mp_selector_str(nvme_bdev));
4090 0 : if (nvme_bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
4091 0 : spdk_json_write_named_uint32(w, "rr_min_io", nvme_bdev->rr_min_io);
4092 : }
4093 : }
4094 0 : pthread_mutex_unlock(&nvme_bdev->mutex);
4095 :
4096 0 : return 0;
4097 : }
4098 :
4099 : static void
4100 0 : bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
4101 : {
4102 : /* No config per bdev needed */
4103 0 : }
4104 :
4105 : static uint64_t
4106 0 : bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
4107 : {
4108 0 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
4109 : struct nvme_io_path *io_path;
4110 : struct nvme_poll_group *group;
4111 0 : uint64_t spin_time = 0;
4112 :
4113 0 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
4114 0 : group = io_path->qpair->group;
4115 :
4116 0 : if (!group || !group->collect_spin_stat) {
4117 0 : continue;
4118 : }
4119 :
4120 0 : if (group->end_ticks != 0) {
4121 0 : group->spin_ticks += (group->end_ticks - group->start_ticks);
4122 0 : group->end_ticks = 0;
4123 : }
4124 :
4125 0 : spin_time += group->spin_ticks;
4126 0 : group->start_ticks = 0;
4127 0 : group->spin_ticks = 0;
4128 : }
4129 :
4130 0 : return (spin_time * 1000000ULL) / spdk_get_ticks_hz();
4131 : }
4132 :
4133 : static void
4134 0 : bdev_nvme_reset_device_stat(void *ctx)
4135 : {
4136 0 : struct nvme_bdev *nbdev = ctx;
4137 :
4138 0 : if (nbdev->err_stat != NULL) {
4139 0 : memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat));
4140 : }
4141 0 : }
4142 :
4143 : /* JSON string should be lowercases and underscore delimited string. */
4144 : static void
4145 0 : bdev_nvme_format_nvme_status(char *dst, const char *src)
4146 : {
4147 0 : char tmp[256];
4148 :
4149 0 : spdk_strcpy_replace(dst, 256, src, " - ", "_");
4150 0 : spdk_strcpy_replace(tmp, 256, dst, "-", "_");
4151 0 : spdk_strcpy_replace(dst, 256, tmp, " ", "_");
4152 0 : spdk_strlwr(dst);
4153 0 : }
4154 :
4155 : static void
4156 0 : bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w)
4157 : {
4158 0 : struct nvme_bdev *nbdev = ctx;
4159 0 : struct spdk_nvme_status status = {};
4160 : uint16_t sct, sc;
4161 0 : char status_json[256];
4162 : const char *status_str;
4163 :
4164 0 : if (nbdev->err_stat == NULL) {
4165 0 : return;
4166 : }
4167 :
4168 0 : spdk_json_write_named_object_begin(w, "nvme_error");
4169 :
4170 0 : spdk_json_write_named_object_begin(w, "status_type");
4171 0 : for (sct = 0; sct < 8; sct++) {
4172 0 : if (nbdev->err_stat->status_type[sct] == 0) {
4173 0 : continue;
4174 : }
4175 0 : status.sct = sct;
4176 :
4177 0 : status_str = spdk_nvme_cpl_get_status_type_string(&status);
4178 0 : assert(status_str != NULL);
4179 0 : bdev_nvme_format_nvme_status(status_json, status_str);
4180 :
4181 0 : spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]);
4182 : }
4183 0 : spdk_json_write_object_end(w);
4184 :
4185 0 : spdk_json_write_named_object_begin(w, "status_code");
4186 0 : for (sct = 0; sct < 4; sct++) {
4187 0 : status.sct = sct;
4188 0 : for (sc = 0; sc < 256; sc++) {
4189 0 : if (nbdev->err_stat->status[sct][sc] == 0) {
4190 0 : continue;
4191 : }
4192 0 : status.sc = sc;
4193 :
4194 0 : status_str = spdk_nvme_cpl_get_status_string(&status);
4195 0 : assert(status_str != NULL);
4196 0 : bdev_nvme_format_nvme_status(status_json, status_str);
4197 :
4198 0 : spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]);
4199 : }
4200 : }
4201 0 : spdk_json_write_object_end(w);
4202 :
4203 0 : spdk_json_write_object_end(w);
4204 : }
4205 :
4206 : static bool
4207 0 : bdev_nvme_accel_sequence_supported(void *ctx, enum spdk_bdev_io_type type)
4208 : {
4209 0 : struct nvme_bdev *nbdev = ctx;
4210 : struct spdk_nvme_ctrlr *ctrlr;
4211 :
4212 0 : if (!g_opts.allow_accel_sequence) {
4213 0 : return false;
4214 : }
4215 :
4216 0 : switch (type) {
4217 0 : case SPDK_BDEV_IO_TYPE_WRITE:
4218 : case SPDK_BDEV_IO_TYPE_READ:
4219 0 : break;
4220 0 : default:
4221 0 : return false;
4222 : }
4223 :
4224 0 : ctrlr = bdev_nvme_get_ctrlr(&nbdev->disk);
4225 0 : assert(ctrlr != NULL);
4226 :
4227 0 : return spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
4228 : }
4229 :
4230 : static const struct spdk_bdev_fn_table nvmelib_fn_table = {
4231 : .destruct = bdev_nvme_destruct,
4232 : .submit_request = bdev_nvme_submit_request,
4233 : .io_type_supported = bdev_nvme_io_type_supported,
4234 : .get_io_channel = bdev_nvme_get_io_channel,
4235 : .dump_info_json = bdev_nvme_dump_info_json,
4236 : .write_config_json = bdev_nvme_write_config_json,
4237 : .get_spin_time = bdev_nvme_get_spin_time,
4238 : .get_module_ctx = bdev_nvme_get_module_ctx,
4239 : .get_memory_domains = bdev_nvme_get_memory_domains,
4240 : .accel_sequence_supported = bdev_nvme_accel_sequence_supported,
4241 : .reset_device_stat = bdev_nvme_reset_device_stat,
4242 : .dump_device_stat_json = bdev_nvme_dump_device_stat_json,
4243 : };
4244 :
4245 : typedef int (*bdev_nvme_parse_ana_log_page_cb)(
4246 : const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg);
4247 :
4248 : static int
4249 41 : bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
4250 : bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
4251 : {
4252 : struct spdk_nvme_ana_group_descriptor *copied_desc;
4253 : uint8_t *orig_desc;
4254 : uint32_t i, desc_size, copy_len;
4255 41 : int rc = 0;
4256 :
4257 41 : if (nvme_ctrlr->ana_log_page == NULL) {
4258 0 : return -EINVAL;
4259 : }
4260 :
4261 41 : copied_desc = nvme_ctrlr->copied_ana_desc;
4262 :
4263 41 : orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
4264 41 : copy_len = nvme_ctrlr->max_ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
4265 :
4266 71 : for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) {
4267 66 : memcpy(copied_desc, orig_desc, copy_len);
4268 :
4269 66 : rc = cb_fn(copied_desc, cb_arg);
4270 66 : if (rc != 0) {
4271 36 : break;
4272 : }
4273 :
4274 30 : desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
4275 30 : copied_desc->num_of_nsid * sizeof(uint32_t);
4276 30 : orig_desc += desc_size;
4277 30 : copy_len -= desc_size;
4278 : }
4279 :
4280 41 : return rc;
4281 : }
4282 :
4283 : static int
4284 5 : nvme_ns_ana_transition_timedout(void *ctx)
4285 : {
4286 5 : struct nvme_ns *nvme_ns = ctx;
4287 :
4288 5 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4289 5 : nvme_ns->ana_transition_timedout = true;
4290 :
4291 5 : return SPDK_POLLER_BUSY;
4292 : }
4293 :
4294 : static void
4295 45 : _nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
4296 : const struct spdk_nvme_ana_group_descriptor *desc)
4297 : {
4298 : const struct spdk_nvme_ctrlr_data *cdata;
4299 :
4300 45 : nvme_ns->ana_group_id = desc->ana_group_id;
4301 45 : nvme_ns->ana_state = desc->ana_state;
4302 45 : nvme_ns->ana_state_updating = false;
4303 :
4304 45 : switch (nvme_ns->ana_state) {
4305 38 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
4306 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
4307 38 : nvme_ns->ana_transition_timedout = false;
4308 38 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4309 38 : break;
4310 :
4311 6 : case SPDK_NVME_ANA_INACCESSIBLE_STATE:
4312 : case SPDK_NVME_ANA_CHANGE_STATE:
4313 6 : if (nvme_ns->anatt_timer != NULL) {
4314 1 : break;
4315 : }
4316 :
4317 5 : cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
4318 5 : nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
4319 : nvme_ns,
4320 : cdata->anatt * SPDK_SEC_TO_USEC);
4321 5 : break;
4322 1 : default:
4323 1 : break;
4324 : }
4325 45 : }
4326 :
4327 : static int
4328 59 : nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg)
4329 : {
4330 59 : struct nvme_ns *nvme_ns = cb_arg;
4331 : uint32_t i;
4332 :
4333 59 : assert(nvme_ns->ns != NULL);
4334 :
4335 81 : for (i = 0; i < desc->num_of_nsid; i++) {
4336 58 : if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) {
4337 22 : continue;
4338 : }
4339 :
4340 36 : _nvme_ns_set_ana_state(nvme_ns, desc);
4341 36 : return 1;
4342 : }
4343 :
4344 23 : return 0;
4345 : }
4346 :
4347 : static int
4348 5 : nvme_generate_uuid(const char *sn, uint32_t nsid, struct spdk_uuid *uuid)
4349 : {
4350 5 : int rc = 0;
4351 5 : struct spdk_uuid new_uuid, namespace_uuid;
4352 5 : char merged_str[SPDK_NVME_CTRLR_SN_LEN + NSID_STR_LEN + 1] = {'\0'};
4353 : /* This namespace UUID was generated using uuid_generate() method. */
4354 5 : const char *namespace_str = {"edaed2de-24bc-4b07-b559-f47ecbe730fd"};
4355 : int size;
4356 :
4357 5 : assert(strlen(sn) <= SPDK_NVME_CTRLR_SN_LEN);
4358 :
4359 5 : spdk_uuid_set_null(&new_uuid);
4360 5 : spdk_uuid_set_null(&namespace_uuid);
4361 :
4362 5 : size = snprintf(merged_str, sizeof(merged_str), "%s%"PRIu32, sn, nsid);
4363 5 : if (size <= 0 || (unsigned long)size >= sizeof(merged_str)) {
4364 0 : return -EINVAL;
4365 : }
4366 :
4367 5 : spdk_uuid_parse(&namespace_uuid, namespace_str);
4368 :
4369 5 : rc = spdk_uuid_generate_sha1(&new_uuid, &namespace_uuid, merged_str, size);
4370 5 : if (rc == 0) {
4371 5 : memcpy(uuid, &new_uuid, sizeof(struct spdk_uuid));
4372 : }
4373 :
4374 5 : return rc;
4375 : }
4376 :
4377 : static int
4378 38 : nvme_disk_create(struct spdk_bdev *disk, const char *base_name,
4379 : struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns,
4380 : struct spdk_bdev_nvme_ctrlr_opts *bdev_opts, void *ctx)
4381 : {
4382 : const struct spdk_uuid *uuid;
4383 : const uint8_t *nguid;
4384 : const struct spdk_nvme_ctrlr_data *cdata;
4385 : const struct spdk_nvme_ns_data *nsdata;
4386 : const struct spdk_nvme_ctrlr_opts *opts;
4387 : enum spdk_nvme_csi csi;
4388 : uint32_t atomic_bs, phys_bs, bs;
4389 38 : char sn_tmp[SPDK_NVME_CTRLR_SN_LEN + 1] = {'\0'};
4390 : int rc;
4391 :
4392 38 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
4393 38 : csi = spdk_nvme_ns_get_csi(ns);
4394 38 : opts = spdk_nvme_ctrlr_get_opts(ctrlr);
4395 :
4396 38 : switch (csi) {
4397 38 : case SPDK_NVME_CSI_NVM:
4398 38 : disk->product_name = "NVMe disk";
4399 38 : break;
4400 0 : case SPDK_NVME_CSI_ZNS:
4401 0 : disk->product_name = "NVMe ZNS disk";
4402 0 : disk->zoned = true;
4403 0 : disk->zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
4404 0 : disk->max_zone_append_size = spdk_nvme_zns_ctrlr_get_max_zone_append_size(ctrlr) /
4405 0 : spdk_nvme_ns_get_extended_sector_size(ns);
4406 0 : disk->max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(ns);
4407 0 : disk->max_active_zones = spdk_nvme_zns_ns_get_max_active_zones(ns);
4408 0 : break;
4409 0 : default:
4410 0 : if (bdev_opts->allow_unrecognized_csi) {
4411 0 : disk->product_name = "NVMe Passthrough disk";
4412 0 : break;
4413 : }
4414 0 : SPDK_ERRLOG("unsupported CSI: %u\n", csi);
4415 0 : return -ENOTSUP;
4416 : }
4417 :
4418 38 : nguid = spdk_nvme_ns_get_nguid(ns);
4419 38 : if (!nguid) {
4420 38 : uuid = spdk_nvme_ns_get_uuid(ns);
4421 38 : if (uuid) {
4422 12 : disk->uuid = *uuid;
4423 26 : } else if (g_opts.generate_uuids) {
4424 0 : spdk_strcpy_pad(sn_tmp, cdata->sn, SPDK_NVME_CTRLR_SN_LEN, '\0');
4425 0 : rc = nvme_generate_uuid(sn_tmp, spdk_nvme_ns_get_id(ns), &disk->uuid);
4426 0 : if (rc < 0) {
4427 0 : SPDK_ERRLOG("UUID generation failed (%s)\n", spdk_strerror(-rc));
4428 0 : return rc;
4429 : }
4430 : }
4431 : } else {
4432 0 : memcpy(&disk->uuid, nguid, sizeof(disk->uuid));
4433 : }
4434 :
4435 38 : disk->name = spdk_sprintf_alloc("%sn%d", base_name, spdk_nvme_ns_get_id(ns));
4436 38 : if (!disk->name) {
4437 0 : return -ENOMEM;
4438 : }
4439 :
4440 38 : disk->write_cache = 0;
4441 38 : if (cdata->vwc.present) {
4442 : /* Enable if the Volatile Write Cache exists */
4443 0 : disk->write_cache = 1;
4444 : }
4445 38 : if (cdata->oncs.write_zeroes) {
4446 0 : disk->max_write_zeroes = UINT16_MAX + 1;
4447 : }
4448 38 : disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
4449 38 : disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns);
4450 38 : disk->max_segment_size = spdk_nvme_ctrlr_get_max_xfer_size(ctrlr);
4451 38 : disk->ctratt.raw = cdata->ctratt.raw;
4452 : /* NVMe driver will split one request into multiple requests
4453 : * based on MDTS and stripe boundary, the bdev layer will use
4454 : * max_segment_size and max_num_segments to split one big IO
4455 : * into multiple requests, then small request can't run out
4456 : * of NVMe internal requests data structure.
4457 : */
4458 38 : if (opts && opts->io_queue_requests) {
4459 0 : disk->max_num_segments = opts->io_queue_requests / 2;
4460 : }
4461 38 : if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_SGL_SUPPORTED) {
4462 : /* The nvme driver will try to split I/O that have too many
4463 : * SGEs, but it doesn't work if that last SGE doesn't end on
4464 : * an aggregate total that is block aligned. The bdev layer has
4465 : * a more robust splitting framework, so use that instead for
4466 : * this case. (See issue #3269.)
4467 : */
4468 0 : uint16_t max_sges = spdk_nvme_ctrlr_get_max_sges(ctrlr);
4469 :
4470 0 : if (disk->max_num_segments == 0) {
4471 0 : disk->max_num_segments = max_sges;
4472 : } else {
4473 0 : disk->max_num_segments = spdk_min(disk->max_num_segments, max_sges);
4474 : }
4475 : }
4476 38 : disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
4477 :
4478 38 : nsdata = spdk_nvme_ns_get_data(ns);
4479 38 : bs = spdk_nvme_ns_get_sector_size(ns);
4480 38 : atomic_bs = bs;
4481 38 : phys_bs = bs;
4482 38 : if (nsdata->nabo == 0) {
4483 38 : if (nsdata->nsfeat.ns_atomic_write_unit && nsdata->nawupf) {
4484 0 : atomic_bs = bs * (1 + nsdata->nawupf);
4485 : } else {
4486 38 : atomic_bs = bs * (1 + cdata->awupf);
4487 : }
4488 : }
4489 38 : if (nsdata->nsfeat.optperf) {
4490 0 : phys_bs = bs * (1 + nsdata->npwg);
4491 : }
4492 38 : disk->phys_blocklen = spdk_min(phys_bs, atomic_bs);
4493 :
4494 38 : disk->md_len = spdk_nvme_ns_get_md_size(ns);
4495 38 : if (disk->md_len != 0) {
4496 0 : disk->md_interleave = nsdata->flbas.extended;
4497 0 : disk->dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
4498 0 : if (disk->dif_type != SPDK_DIF_DISABLE) {
4499 0 : disk->dif_is_head_of_md = nsdata->dps.md_start;
4500 0 : disk->dif_check_flags = bdev_opts->prchk_flags;
4501 0 : disk->dif_pi_format = (enum spdk_dif_pi_format)spdk_nvme_ns_get_pi_format(ns);
4502 : }
4503 : }
4504 :
4505 38 : if (!(spdk_nvme_ctrlr_get_flags(ctrlr) &
4506 : SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED)) {
4507 38 : disk->acwu = 0;
4508 0 : } else if (nsdata->nsfeat.ns_atomic_write_unit) {
4509 0 : disk->acwu = nsdata->nacwu + 1; /* 0-based */
4510 : } else {
4511 0 : disk->acwu = cdata->acwu + 1; /* 0-based */
4512 : }
4513 :
4514 38 : if (cdata->oncs.copy) {
4515 : /* For now bdev interface allows only single segment copy */
4516 0 : disk->max_copy = nsdata->mssrl;
4517 : }
4518 :
4519 38 : disk->ctxt = ctx;
4520 38 : disk->fn_table = &nvmelib_fn_table;
4521 38 : disk->module = &nvme_if;
4522 :
4523 38 : disk->numa.id_valid = 1;
4524 38 : disk->numa.id = spdk_nvme_ctrlr_get_numa_id(ctrlr);
4525 :
4526 38 : return 0;
4527 : }
4528 :
4529 : static struct nvme_bdev *
4530 38 : nvme_bdev_alloc(void)
4531 : {
4532 : struct nvme_bdev *bdev;
4533 : int rc;
4534 :
4535 38 : bdev = calloc(1, sizeof(*bdev));
4536 38 : if (!bdev) {
4537 0 : SPDK_ERRLOG("bdev calloc() failed\n");
4538 0 : return NULL;
4539 : }
4540 :
4541 38 : if (g_opts.nvme_error_stat) {
4542 0 : bdev->err_stat = calloc(1, sizeof(struct nvme_error_stat));
4543 0 : if (!bdev->err_stat) {
4544 0 : SPDK_ERRLOG("err_stat calloc() failed\n");
4545 0 : free(bdev);
4546 0 : return NULL;
4547 : }
4548 : }
4549 :
4550 38 : rc = pthread_mutex_init(&bdev->mutex, NULL);
4551 38 : if (rc != 0) {
4552 0 : free(bdev->err_stat);
4553 0 : free(bdev);
4554 0 : return NULL;
4555 : }
4556 :
4557 38 : bdev->ref = 1;
4558 38 : bdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE;
4559 38 : bdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN;
4560 38 : bdev->rr_min_io = UINT32_MAX;
4561 38 : TAILQ_INIT(&bdev->nvme_ns_list);
4562 :
4563 38 : return bdev;
4564 : }
4565 :
4566 : static int
4567 38 : nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4568 : {
4569 : struct nvme_bdev *bdev;
4570 38 : struct nvme_bdev_ctrlr *nbdev_ctrlr = nvme_ctrlr->nbdev_ctrlr;
4571 : int rc;
4572 :
4573 38 : bdev = nvme_bdev_alloc();
4574 38 : if (bdev == NULL) {
4575 0 : SPDK_ERRLOG("Failed to allocate NVMe bdev\n");
4576 0 : return -ENOMEM;
4577 : }
4578 :
4579 38 : bdev->opal = nvme_ctrlr->opal_dev != NULL;
4580 :
4581 38 : rc = nvme_disk_create(&bdev->disk, nbdev_ctrlr->name, nvme_ctrlr->ctrlr,
4582 : nvme_ns->ns, &nvme_ctrlr->opts, bdev);
4583 38 : if (rc != 0) {
4584 0 : SPDK_ERRLOG("Failed to create NVMe disk\n");
4585 0 : nvme_bdev_free(bdev);
4586 0 : return rc;
4587 : }
4588 :
4589 38 : spdk_io_device_register(bdev,
4590 : bdev_nvme_create_bdev_channel_cb,
4591 : bdev_nvme_destroy_bdev_channel_cb,
4592 : sizeof(struct nvme_bdev_channel),
4593 38 : bdev->disk.name);
4594 :
4595 38 : nvme_ns->bdev = bdev;
4596 38 : bdev->nsid = nvme_ns->id;
4597 38 : TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
4598 :
4599 38 : bdev->nbdev_ctrlr = nbdev_ctrlr;
4600 38 : TAILQ_INSERT_TAIL(&nbdev_ctrlr->bdevs, bdev, tailq);
4601 :
4602 38 : rc = spdk_bdev_register(&bdev->disk);
4603 38 : if (rc != 0) {
4604 1 : SPDK_ERRLOG("spdk_bdev_register() failed\n");
4605 1 : spdk_io_device_unregister(bdev, NULL);
4606 1 : nvme_ns->bdev = NULL;
4607 1 : TAILQ_REMOVE(&nbdev_ctrlr->bdevs, bdev, tailq);
4608 1 : nvme_bdev_free(bdev);
4609 1 : return rc;
4610 : }
4611 :
4612 37 : return 0;
4613 : }
4614 :
4615 : static bool
4616 23 : bdev_nvme_compare_ns(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
4617 : {
4618 : const struct spdk_nvme_ns_data *nsdata1, *nsdata2;
4619 : const struct spdk_uuid *uuid1, *uuid2;
4620 :
4621 23 : nsdata1 = spdk_nvme_ns_get_data(ns1);
4622 23 : nsdata2 = spdk_nvme_ns_get_data(ns2);
4623 23 : uuid1 = spdk_nvme_ns_get_uuid(ns1);
4624 23 : uuid2 = spdk_nvme_ns_get_uuid(ns2);
4625 :
4626 45 : return memcmp(nsdata1->nguid, nsdata2->nguid, sizeof(nsdata1->nguid)) == 0 &&
4627 22 : nsdata1->eui64 == nsdata2->eui64 &&
4628 21 : ((uuid1 == NULL && uuid2 == NULL) ||
4629 59 : (uuid1 != NULL && uuid2 != NULL && spdk_uuid_compare(uuid1, uuid2) == 0)) &&
4630 18 : spdk_nvme_ns_get_csi(ns1) == spdk_nvme_ns_get_csi(ns2);
4631 : }
4632 :
4633 : static bool
4634 0 : hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
4635 : struct spdk_nvme_ctrlr_opts *opts)
4636 : {
4637 : struct nvme_probe_skip_entry *entry;
4638 :
4639 0 : TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
4640 0 : if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
4641 0 : return false;
4642 : }
4643 : }
4644 :
4645 0 : opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
4646 0 : opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
4647 0 : opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
4648 0 : opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
4649 0 : opts->disable_read_ana_log_page = true;
4650 :
4651 0 : SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr);
4652 :
4653 0 : return true;
4654 : }
4655 :
4656 : static void
4657 0 : nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
4658 : {
4659 0 : struct nvme_ctrlr *nvme_ctrlr = ctx;
4660 :
4661 0 : if (spdk_nvme_cpl_is_error(cpl)) {
4662 0 : NVME_CTRLR_WARNLOG(nvme_ctrlr, "Abort failed. Resetting controller. sc is %u, sct is %u.\n",
4663 : cpl->status.sc, cpl->status.sct);
4664 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4665 0 : } else if (cpl->cdw0 & 0x1) {
4666 0 : NVME_CTRLR_WARNLOG(nvme_ctrlr, "Specified command could not be aborted.\n");
4667 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4668 : }
4669 0 : }
4670 :
4671 : static void
4672 0 : timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
4673 : struct spdk_nvme_qpair *qpair, uint16_t cid)
4674 : {
4675 0 : struct nvme_ctrlr *nvme_ctrlr = cb_arg;
4676 : union spdk_nvme_csts_register csts;
4677 : int rc;
4678 :
4679 0 : assert(nvme_ctrlr->ctrlr == ctrlr);
4680 :
4681 0 : NVME_CTRLR_WARNLOG(nvme_ctrlr, "Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n",
4682 : ctrlr, qpair, cid);
4683 :
4684 : /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O
4685 : * queue. (Note: qpair == NULL when there's an admin cmd timeout.) Otherwise we
4686 : * would submit another fabrics cmd on the admin queue to read CSTS and check for its
4687 : * completion recursively.
4688 : */
4689 0 : if (nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE || qpair != NULL) {
4690 0 : csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
4691 0 : if (csts.bits.cfs) {
4692 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Controller Fatal Status, reset required\n");
4693 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4694 0 : return;
4695 : }
4696 : }
4697 :
4698 0 : switch (g_opts.action_on_timeout) {
4699 0 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
4700 0 : if (qpair) {
4701 : /* Don't send abort to ctrlr when ctrlr is not available. */
4702 0 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4703 0 : if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
4704 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4705 0 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Quit abort. Ctrlr is not available.\n");
4706 0 : return;
4707 : }
4708 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4709 :
4710 0 : rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
4711 : nvme_abort_cpl, nvme_ctrlr);
4712 0 : if (rc == 0) {
4713 0 : return;
4714 : }
4715 :
4716 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Unable to send abort. Resetting, rc is %d.\n", rc);
4717 : }
4718 :
4719 : /* FALLTHROUGH */
4720 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
4721 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4722 0 : break;
4723 0 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
4724 0 : NVME_CTRLR_DEBUGLOG(nvme_ctrlr, "No action for nvme controller timeout.\n");
4725 0 : break;
4726 0 : default:
4727 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "An invalid timeout action value is found.\n");
4728 0 : break;
4729 : }
4730 : }
4731 :
4732 : static struct nvme_ns *
4733 51 : nvme_ns_alloc(void)
4734 : {
4735 : struct nvme_ns *nvme_ns;
4736 :
4737 51 : nvme_ns = calloc(1, sizeof(struct nvme_ns));
4738 51 : if (nvme_ns == NULL) {
4739 0 : return NULL;
4740 : }
4741 :
4742 51 : if (g_opts.io_path_stat) {
4743 0 : nvme_ns->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
4744 0 : if (nvme_ns->stat == NULL) {
4745 0 : free(nvme_ns);
4746 0 : return NULL;
4747 : }
4748 0 : spdk_bdev_reset_io_stat(nvme_ns->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
4749 : }
4750 :
4751 51 : return nvme_ns;
4752 : }
4753 :
4754 : static void
4755 51 : nvme_ns_free(struct nvme_ns *nvme_ns)
4756 : {
4757 51 : free(nvme_ns->stat);
4758 51 : free(nvme_ns);
4759 51 : }
4760 :
4761 : static void
4762 51 : nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc)
4763 : {
4764 51 : struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
4765 51 : struct nvme_async_probe_ctx *ctx = nvme_ns->probe_ctx;
4766 :
4767 51 : if (rc == 0) {
4768 49 : nvme_ns->probe_ctx = NULL;
4769 49 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4770 49 : nvme_ctrlr->ref++;
4771 49 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4772 : } else {
4773 2 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4774 2 : nvme_ns_free(nvme_ns);
4775 : }
4776 :
4777 51 : if (ctx) {
4778 50 : ctx->populates_in_progress--;
4779 50 : if (ctx->populates_in_progress == 0) {
4780 12 : nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
4781 : }
4782 : }
4783 51 : }
4784 :
4785 : static void
4786 2 : bdev_nvme_add_io_path(struct nvme_bdev_channel_iter *i,
4787 : struct nvme_bdev *nbdev,
4788 : struct nvme_bdev_channel *nbdev_ch, void *ctx)
4789 : {
4790 2 : struct nvme_ns *nvme_ns = ctx;
4791 : int rc;
4792 :
4793 2 : rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
4794 2 : if (rc != 0) {
4795 0 : SPDK_ERRLOG("Failed to add I/O path to bdev_channel dynamically.\n");
4796 : }
4797 :
4798 2 : nvme_bdev_for_each_channel_continue(i, rc);
4799 2 : }
4800 :
4801 : static void
4802 2 : bdev_nvme_delete_io_path(struct nvme_bdev_channel_iter *i,
4803 : struct nvme_bdev *nbdev,
4804 : struct nvme_bdev_channel *nbdev_ch, void *ctx)
4805 : {
4806 2 : struct nvme_ns *nvme_ns = ctx;
4807 : struct nvme_io_path *io_path;
4808 :
4809 2 : io_path = _bdev_nvme_get_io_path(nbdev_ch, nvme_ns);
4810 2 : if (io_path != NULL) {
4811 2 : _bdev_nvme_delete_io_path(nbdev_ch, io_path);
4812 : }
4813 :
4814 2 : nvme_bdev_for_each_channel_continue(i, 0);
4815 2 : }
4816 :
4817 : static void
4818 0 : bdev_nvme_add_io_path_failed(struct nvme_bdev *nbdev, void *ctx, int status)
4819 : {
4820 0 : struct nvme_ns *nvme_ns = ctx;
4821 :
4822 0 : nvme_ctrlr_populate_namespace_done(nvme_ns, -1);
4823 0 : }
4824 :
4825 : static void
4826 12 : bdev_nvme_add_io_path_done(struct nvme_bdev *nbdev, void *ctx, int status)
4827 : {
4828 12 : struct nvme_ns *nvme_ns = ctx;
4829 :
4830 12 : if (status == 0) {
4831 12 : nvme_ctrlr_populate_namespace_done(nvme_ns, 0);
4832 : } else {
4833 : /* Delete the added io_paths and fail populating the namespace. */
4834 0 : nvme_bdev_for_each_channel(nbdev,
4835 : bdev_nvme_delete_io_path,
4836 : nvme_ns,
4837 : bdev_nvme_add_io_path_failed);
4838 : }
4839 12 : }
4840 :
4841 : static int
4842 13 : nvme_bdev_add_ns(struct nvme_bdev *bdev, struct nvme_ns *nvme_ns)
4843 : {
4844 : struct nvme_ns *tmp_ns;
4845 : const struct spdk_nvme_ns_data *nsdata;
4846 :
4847 13 : nsdata = spdk_nvme_ns_get_data(nvme_ns->ns);
4848 13 : if (!nsdata->nmic.can_share) {
4849 0 : SPDK_ERRLOG("Namespace cannot be shared.\n");
4850 0 : return -EINVAL;
4851 : }
4852 :
4853 13 : pthread_mutex_lock(&bdev->mutex);
4854 :
4855 13 : tmp_ns = TAILQ_FIRST(&bdev->nvme_ns_list);
4856 13 : assert(tmp_ns != NULL);
4857 :
4858 13 : if (tmp_ns->ns != NULL && !bdev_nvme_compare_ns(nvme_ns->ns, tmp_ns->ns)) {
4859 1 : pthread_mutex_unlock(&bdev->mutex);
4860 1 : SPDK_ERRLOG("Namespaces are not identical.\n");
4861 1 : return -EINVAL;
4862 : }
4863 :
4864 12 : bdev->ref++;
4865 12 : TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
4866 12 : nvme_ns->bdev = bdev;
4867 :
4868 12 : pthread_mutex_unlock(&bdev->mutex);
4869 :
4870 : /* Add nvme_io_path to nvme_bdev_channels dynamically. */
4871 12 : nvme_bdev_for_each_channel(bdev,
4872 : bdev_nvme_add_io_path,
4873 : nvme_ns,
4874 : bdev_nvme_add_io_path_done);
4875 :
4876 12 : return 0;
4877 : }
4878 :
4879 : static void
4880 51 : nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4881 : {
4882 : struct spdk_nvme_ns *ns;
4883 : struct nvme_bdev *bdev;
4884 51 : int rc = 0;
4885 :
4886 51 : ns = spdk_nvme_ctrlr_get_ns(nvme_ctrlr->ctrlr, nvme_ns->id);
4887 51 : if (!ns) {
4888 0 : NVME_CTRLR_DEBUGLOG(nvme_ctrlr, "Invalid NS %d\n", nvme_ns->id);
4889 0 : rc = -EINVAL;
4890 0 : goto done;
4891 : }
4892 :
4893 51 : nvme_ns->ns = ns;
4894 51 : nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
4895 :
4896 51 : if (nvme_ctrlr->ana_log_page != NULL) {
4897 37 : bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns);
4898 : }
4899 :
4900 51 : bdev = nvme_bdev_ctrlr_get_bdev(nvme_ctrlr->nbdev_ctrlr, nvme_ns->id);
4901 51 : if (bdev == NULL) {
4902 38 : rc = nvme_bdev_create(nvme_ctrlr, nvme_ns);
4903 : } else {
4904 13 : rc = nvme_bdev_add_ns(bdev, nvme_ns);
4905 13 : if (rc == 0) {
4906 12 : return;
4907 : }
4908 : }
4909 1 : done:
4910 39 : nvme_ctrlr_populate_namespace_done(nvme_ns, rc);
4911 : }
4912 :
4913 : static void
4914 49 : nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns)
4915 : {
4916 49 : struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
4917 :
4918 49 : assert(nvme_ctrlr != NULL);
4919 :
4920 49 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4921 :
4922 49 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4923 :
4924 49 : if (nvme_ns->bdev != NULL) {
4925 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4926 0 : return;
4927 : }
4928 :
4929 49 : nvme_ns_free(nvme_ns);
4930 49 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4931 :
4932 49 : nvme_ctrlr_release(nvme_ctrlr);
4933 : }
4934 :
4935 : static void
4936 11 : bdev_nvme_delete_io_path_done(struct nvme_bdev *nbdev, void *ctx, int status)
4937 : {
4938 11 : struct nvme_ns *nvme_ns = ctx;
4939 :
4940 11 : nvme_ctrlr_depopulate_namespace_done(nvme_ns);
4941 11 : }
4942 :
4943 : static void
4944 49 : nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4945 : {
4946 : struct nvme_bdev *bdev;
4947 :
4948 49 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4949 :
4950 49 : bdev = nvme_ns->bdev;
4951 49 : if (bdev != NULL) {
4952 45 : pthread_mutex_lock(&bdev->mutex);
4953 :
4954 45 : assert(bdev->ref > 0);
4955 45 : bdev->ref--;
4956 45 : if (bdev->ref == 0) {
4957 34 : pthread_mutex_unlock(&bdev->mutex);
4958 :
4959 34 : spdk_bdev_unregister(&bdev->disk, NULL, NULL);
4960 : } else {
4961 : /* spdk_bdev_unregister() is not called until the last nvme_ns is
4962 : * depopulated. Hence we need to remove nvme_ns from bdev->nvme_ns_list
4963 : * and clear nvme_ns->bdev here.
4964 : */
4965 11 : TAILQ_REMOVE(&bdev->nvme_ns_list, nvme_ns, tailq);
4966 11 : nvme_ns->bdev = NULL;
4967 :
4968 11 : pthread_mutex_unlock(&bdev->mutex);
4969 :
4970 : /* Delete nvme_io_paths from nvme_bdev_channels dynamically. After that,
4971 : * we call depopulate_namespace_done() to avoid use-after-free.
4972 : */
4973 11 : nvme_bdev_for_each_channel(bdev,
4974 : bdev_nvme_delete_io_path,
4975 : nvme_ns,
4976 : bdev_nvme_delete_io_path_done);
4977 11 : return;
4978 : }
4979 : }
4980 :
4981 38 : nvme_ctrlr_depopulate_namespace_done(nvme_ns);
4982 : }
4983 :
4984 : static void
4985 62 : nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
4986 : struct nvme_async_probe_ctx *ctx)
4987 : {
4988 62 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
4989 : struct nvme_ns *nvme_ns, *next;
4990 : struct spdk_nvme_ns *ns;
4991 : struct nvme_bdev *bdev;
4992 : uint32_t nsid;
4993 : int rc;
4994 : uint64_t num_sectors;
4995 :
4996 62 : if (ctx) {
4997 : /* Initialize this count to 1 to handle the populate functions
4998 : * calling nvme_ctrlr_populate_namespace_done() immediately.
4999 : */
5000 46 : ctx->populates_in_progress = 1;
5001 : }
5002 :
5003 : /* First loop over our existing namespaces and see if they have been
5004 : * removed. */
5005 62 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
5006 66 : while (nvme_ns != NULL) {
5007 4 : next = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
5008 :
5009 4 : if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
5010 : /* NS is still there or added again. Its attributes may have changed. */
5011 3 : ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id);
5012 3 : if (nvme_ns->ns != ns) {
5013 1 : assert(nvme_ns->ns == NULL);
5014 1 : nvme_ns->ns = ns;
5015 1 : NVME_CTRLR_DEBUGLOG(nvme_ctrlr, "NSID %u was added\n", nvme_ns->id);
5016 : }
5017 :
5018 3 : num_sectors = spdk_nvme_ns_get_num_sectors(ns);
5019 3 : bdev = nvme_ns->bdev;
5020 3 : assert(bdev != NULL);
5021 3 : if (bdev->disk.blockcnt != num_sectors) {
5022 1 : NVME_CTRLR_NOTICELOG(nvme_ctrlr,
5023 : "NSID %u is resized: bdev name %s, old size %" PRIu64 ", new size %" PRIu64 "\n",
5024 : nvme_ns->id,
5025 : bdev->disk.name,
5026 : bdev->disk.blockcnt,
5027 : num_sectors);
5028 1 : rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors);
5029 1 : if (rc != 0) {
5030 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr,
5031 : "Could not change num blocks for nvme bdev: name %s, errno: %d.\n",
5032 : bdev->disk.name, rc);
5033 : }
5034 : }
5035 : } else {
5036 : /* Namespace was removed */
5037 1 : nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
5038 : }
5039 :
5040 4 : nvme_ns = next;
5041 : }
5042 :
5043 : /* Loop through all of the namespaces at the nvme level and see if any of them are new */
5044 62 : nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
5045 116 : while (nsid != 0) {
5046 54 : nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
5047 :
5048 54 : if (nvme_ns == NULL) {
5049 : /* Found a new one */
5050 51 : nvme_ns = nvme_ns_alloc();
5051 51 : if (nvme_ns == NULL) {
5052 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate namespace\n");
5053 : /* This just fails to attach the namespace. It may work on a future attempt. */
5054 0 : continue;
5055 : }
5056 :
5057 51 : nvme_ns->id = nsid;
5058 51 : nvme_ns->ctrlr = nvme_ctrlr;
5059 :
5060 51 : nvme_ns->bdev = NULL;
5061 :
5062 51 : if (ctx) {
5063 50 : ctx->populates_in_progress++;
5064 : }
5065 51 : nvme_ns->probe_ctx = ctx;
5066 :
5067 51 : RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
5068 :
5069 51 : nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns);
5070 : }
5071 :
5072 54 : nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid);
5073 : }
5074 :
5075 62 : if (ctx) {
5076 : /* Decrement this count now that the loop is over to account
5077 : * for the one we started with. If the count is then 0, we
5078 : * know any populate_namespace functions completed immediately,
5079 : * so we'll kick the callback here.
5080 : */
5081 46 : ctx->populates_in_progress--;
5082 46 : if (ctx->populates_in_progress == 0) {
5083 34 : nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
5084 : }
5085 : }
5086 :
5087 62 : }
5088 :
5089 : static void
5090 61 : nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
5091 : {
5092 : struct nvme_ns *nvme_ns, *tmp;
5093 :
5094 109 : RB_FOREACH_SAFE(nvme_ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp) {
5095 48 : nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
5096 : }
5097 61 : }
5098 :
5099 : static uint32_t
5100 36 : nvme_ctrlr_get_ana_log_page_size(struct nvme_ctrlr *nvme_ctrlr)
5101 : {
5102 36 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5103 : const struct spdk_nvme_ctrlr_data *cdata;
5104 36 : uint32_t nsid, ns_count = 0;
5105 :
5106 36 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5107 :
5108 36 : for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
5109 80 : nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
5110 44 : ns_count++;
5111 : }
5112 :
5113 36 : return sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
5114 36 : sizeof(struct spdk_nvme_ana_group_descriptor) + ns_count *
5115 : sizeof(uint32_t);
5116 : }
5117 :
5118 : static int
5119 7 : nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
5120 : void *cb_arg)
5121 : {
5122 7 : struct nvme_ctrlr *nvme_ctrlr = cb_arg;
5123 : struct nvme_ns *nvme_ns;
5124 : uint32_t i, nsid;
5125 :
5126 13 : for (i = 0; i < desc->num_of_nsid; i++) {
5127 6 : nsid = desc->nsid[i];
5128 6 : if (nsid == 0) {
5129 0 : continue;
5130 : }
5131 :
5132 6 : nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
5133 :
5134 6 : if (nvme_ns == NULL) {
5135 : /* Target told us that an inactive namespace had an ANA change */
5136 1 : continue;
5137 : }
5138 :
5139 5 : _nvme_ns_set_ana_state(nvme_ns, desc);
5140 : }
5141 :
5142 7 : return 0;
5143 : }
5144 :
5145 : static void
5146 0 : bdev_nvme_disable_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
5147 : {
5148 : struct nvme_ns *nvme_ns;
5149 :
5150 0 : spdk_free(nvme_ctrlr->ana_log_page);
5151 0 : nvme_ctrlr->ana_log_page = NULL;
5152 :
5153 0 : for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
5154 0 : nvme_ns != NULL;
5155 0 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
5156 0 : nvme_ns->ana_state_updating = false;
5157 0 : nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
5158 : }
5159 0 : }
5160 :
5161 : static void
5162 3 : nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
5163 : {
5164 3 : struct nvme_ctrlr *nvme_ctrlr = ctx;
5165 :
5166 3 : if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
5167 3 : bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
5168 : nvme_ctrlr);
5169 : } else {
5170 0 : bdev_nvme_disable_read_ana_log_page(nvme_ctrlr);
5171 : }
5172 :
5173 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
5174 :
5175 3 : assert(nvme_ctrlr->ana_log_page_updating == true);
5176 3 : nvme_ctrlr->ana_log_page_updating = false;
5177 :
5178 3 : if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
5179 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5180 :
5181 0 : nvme_ctrlr_unregister(nvme_ctrlr);
5182 : } else {
5183 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5184 :
5185 3 : bdev_nvme_clear_io_path_caches(nvme_ctrlr);
5186 : }
5187 3 : }
5188 :
5189 : static int
5190 6 : nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
5191 : {
5192 : uint32_t ana_log_page_size;
5193 : int rc;
5194 :
5195 6 : if (nvme_ctrlr->ana_log_page == NULL) {
5196 0 : return -EINVAL;
5197 : }
5198 :
5199 6 : ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
5200 :
5201 6 : if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
5202 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr,
5203 : "ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
5204 : ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
5205 0 : return -EINVAL;
5206 : }
5207 :
5208 6 : pthread_mutex_lock(&nvme_ctrlr->mutex);
5209 6 : if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
5210 : nvme_ctrlr->ana_log_page_updating) {
5211 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5212 3 : return -EBUSY;
5213 : }
5214 :
5215 3 : nvme_ctrlr->ana_log_page_updating = true;
5216 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5217 :
5218 3 : rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
5219 : SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
5220 : SPDK_NVME_GLOBAL_NS_TAG,
5221 3 : nvme_ctrlr->ana_log_page,
5222 : ana_log_page_size, 0,
5223 : nvme_ctrlr_read_ana_log_page_done,
5224 : nvme_ctrlr);
5225 3 : if (rc != 0) {
5226 0 : nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
5227 : }
5228 :
5229 3 : return rc;
5230 : }
5231 :
5232 : static void
5233 0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
5234 : {
5235 0 : }
5236 :
5237 : struct bdev_nvme_set_preferred_path_ctx {
5238 : struct spdk_bdev_desc *desc;
5239 : struct nvme_ns *nvme_ns;
5240 : bdev_nvme_set_preferred_path_cb cb_fn;
5241 : void *cb_arg;
5242 : };
5243 :
5244 : static void
5245 3 : bdev_nvme_set_preferred_path_done(struct nvme_bdev *nbdev, void *_ctx, int status)
5246 : {
5247 3 : struct bdev_nvme_set_preferred_path_ctx *ctx = _ctx;
5248 :
5249 3 : assert(ctx != NULL);
5250 3 : assert(ctx->desc != NULL);
5251 3 : assert(ctx->cb_fn != NULL);
5252 :
5253 3 : spdk_bdev_close(ctx->desc);
5254 :
5255 3 : ctx->cb_fn(ctx->cb_arg, status);
5256 :
5257 3 : free(ctx);
5258 3 : }
5259 :
5260 : static void
5261 2 : _bdev_nvme_set_preferred_path(struct nvme_bdev_channel_iter *i,
5262 : struct nvme_bdev *nbdev,
5263 : struct nvme_bdev_channel *nbdev_ch, void *_ctx)
5264 : {
5265 2 : struct bdev_nvme_set_preferred_path_ctx *ctx = _ctx;
5266 : struct nvme_io_path *io_path, *prev;
5267 :
5268 2 : prev = NULL;
5269 3 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
5270 3 : if (io_path->nvme_ns == ctx->nvme_ns) {
5271 2 : break;
5272 : }
5273 1 : prev = io_path;
5274 : }
5275 :
5276 2 : if (io_path != NULL) {
5277 2 : if (prev != NULL) {
5278 1 : STAILQ_REMOVE_AFTER(&nbdev_ch->io_path_list, prev, stailq);
5279 1 : STAILQ_INSERT_HEAD(&nbdev_ch->io_path_list, io_path, stailq);
5280 : }
5281 :
5282 : /* We can set io_path to nbdev_ch->current_io_path directly here.
5283 : * However, it needs to be conditional. To simplify the code,
5284 : * just clear nbdev_ch->current_io_path and let find_io_path()
5285 : * fill it.
5286 : *
5287 : * Automatic failback may be disabled. Hence even if the io_path is
5288 : * already at the head, clear nbdev_ch->current_io_path.
5289 : */
5290 2 : bdev_nvme_clear_current_io_path(nbdev_ch);
5291 : }
5292 :
5293 2 : nvme_bdev_for_each_channel_continue(i, 0);
5294 2 : }
5295 :
5296 : static struct nvme_ns *
5297 3 : bdev_nvme_set_preferred_ns(struct nvme_bdev *nbdev, uint16_t cntlid)
5298 : {
5299 : struct nvme_ns *nvme_ns, *prev;
5300 : const struct spdk_nvme_ctrlr_data *cdata;
5301 :
5302 3 : prev = NULL;
5303 6 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
5304 6 : cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
5305 :
5306 6 : if (cdata->cntlid == cntlid) {
5307 3 : break;
5308 : }
5309 3 : prev = nvme_ns;
5310 : }
5311 :
5312 3 : if (nvme_ns != NULL && prev != NULL) {
5313 2 : TAILQ_REMOVE(&nbdev->nvme_ns_list, nvme_ns, tailq);
5314 2 : TAILQ_INSERT_HEAD(&nbdev->nvme_ns_list, nvme_ns, tailq);
5315 : }
5316 :
5317 3 : return nvme_ns;
5318 : }
5319 :
5320 : /* This function supports only multipath mode. There is only a single I/O path
5321 : * for each NVMe-oF controller. Hence, just move the matched I/O path to the
5322 : * head of the I/O path list for each NVMe bdev channel.
5323 : *
5324 : * NVMe bdev channel may be acquired after completing this function. move the
5325 : * matched namespace to the head of the namespace list for the NVMe bdev too.
5326 : */
5327 : void
5328 3 : bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid,
5329 : bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg)
5330 : {
5331 : struct bdev_nvme_set_preferred_path_ctx *ctx;
5332 : struct spdk_bdev *bdev;
5333 : struct nvme_bdev *nbdev;
5334 3 : int rc = 0;
5335 :
5336 3 : assert(cb_fn != NULL);
5337 :
5338 3 : ctx = calloc(1, sizeof(*ctx));
5339 3 : if (ctx == NULL) {
5340 0 : SPDK_ERRLOG("Failed to alloc context.\n");
5341 0 : rc = -ENOMEM;
5342 0 : goto err_alloc;
5343 : }
5344 :
5345 3 : ctx->cb_fn = cb_fn;
5346 3 : ctx->cb_arg = cb_arg;
5347 :
5348 3 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
5349 3 : if (rc != 0) {
5350 0 : SPDK_ERRLOG("Failed to open bdev %s.\n", name);
5351 0 : goto err_open;
5352 : }
5353 :
5354 3 : bdev = spdk_bdev_desc_get_bdev(ctx->desc);
5355 :
5356 3 : if (bdev->module != &nvme_if) {
5357 0 : SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
5358 0 : rc = -ENODEV;
5359 0 : goto err_bdev;
5360 : }
5361 :
5362 3 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
5363 :
5364 3 : pthread_mutex_lock(&nbdev->mutex);
5365 :
5366 3 : ctx->nvme_ns = bdev_nvme_set_preferred_ns(nbdev, cntlid);
5367 3 : if (ctx->nvme_ns == NULL) {
5368 0 : pthread_mutex_unlock(&nbdev->mutex);
5369 :
5370 0 : SPDK_ERRLOG("bdev %s does not have namespace to controller %u.\n", name, cntlid);
5371 0 : rc = -ENODEV;
5372 0 : goto err_bdev;
5373 : }
5374 :
5375 3 : pthread_mutex_unlock(&nbdev->mutex);
5376 :
5377 3 : nvme_bdev_for_each_channel(nbdev,
5378 : _bdev_nvme_set_preferred_path,
5379 : ctx,
5380 : bdev_nvme_set_preferred_path_done);
5381 3 : return;
5382 :
5383 0 : err_bdev:
5384 0 : spdk_bdev_close(ctx->desc);
5385 0 : err_open:
5386 0 : free(ctx);
5387 0 : err_alloc:
5388 0 : cb_fn(cb_arg, rc);
5389 : }
5390 :
5391 : struct bdev_nvme_set_multipath_policy_ctx {
5392 : struct spdk_bdev_desc *desc;
5393 : spdk_bdev_nvme_set_multipath_policy_cb cb_fn;
5394 : void *cb_arg;
5395 : };
5396 :
5397 : static void
5398 3 : bdev_nvme_set_multipath_policy_done(struct nvme_bdev *nbdev, void *_ctx, int status)
5399 : {
5400 3 : struct bdev_nvme_set_multipath_policy_ctx *ctx = _ctx;
5401 :
5402 3 : assert(ctx != NULL);
5403 3 : assert(ctx->desc != NULL);
5404 3 : assert(ctx->cb_fn != NULL);
5405 :
5406 3 : spdk_bdev_close(ctx->desc);
5407 :
5408 3 : ctx->cb_fn(ctx->cb_arg, status);
5409 :
5410 3 : free(ctx);
5411 3 : }
5412 :
5413 : static void
5414 1 : _bdev_nvme_set_multipath_policy(struct nvme_bdev_channel_iter *i,
5415 : struct nvme_bdev *nbdev,
5416 : struct nvme_bdev_channel *nbdev_ch, void *ctx)
5417 : {
5418 1 : nbdev_ch->mp_policy = nbdev->mp_policy;
5419 1 : nbdev_ch->mp_selector = nbdev->mp_selector;
5420 1 : nbdev_ch->rr_min_io = nbdev->rr_min_io;
5421 1 : bdev_nvme_clear_current_io_path(nbdev_ch);
5422 :
5423 1 : nvme_bdev_for_each_channel_continue(i, 0);
5424 1 : }
5425 :
5426 : void
5427 3 : spdk_bdev_nvme_set_multipath_policy(const char *name, enum spdk_bdev_nvme_multipath_policy policy,
5428 : enum spdk_bdev_nvme_multipath_selector selector, uint32_t rr_min_io,
5429 : spdk_bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
5430 : {
5431 : struct bdev_nvme_set_multipath_policy_ctx *ctx;
5432 : struct spdk_bdev *bdev;
5433 : struct nvme_bdev *nbdev;
5434 : int rc;
5435 :
5436 3 : assert(cb_fn != NULL);
5437 :
5438 3 : switch (policy) {
5439 1 : case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
5440 1 : break;
5441 2 : case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
5442 : switch (selector) {
5443 1 : case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
5444 1 : if (rr_min_io == UINT32_MAX) {
5445 0 : rr_min_io = 1;
5446 1 : } else if (rr_min_io == 0) {
5447 0 : rc = -EINVAL;
5448 0 : goto exit;
5449 : }
5450 1 : break;
5451 1 : case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
5452 1 : break;
5453 0 : default:
5454 0 : rc = -EINVAL;
5455 0 : goto exit;
5456 : }
5457 2 : break;
5458 0 : default:
5459 0 : rc = -EINVAL;
5460 0 : goto exit;
5461 : }
5462 :
5463 3 : ctx = calloc(1, sizeof(*ctx));
5464 3 : if (ctx == NULL) {
5465 0 : SPDK_ERRLOG("Failed to alloc context.\n");
5466 0 : rc = -ENOMEM;
5467 0 : goto exit;
5468 : }
5469 :
5470 3 : ctx->cb_fn = cb_fn;
5471 3 : ctx->cb_arg = cb_arg;
5472 :
5473 3 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
5474 3 : if (rc != 0) {
5475 0 : SPDK_ERRLOG("Failed to open bdev %s.\n", name);
5476 0 : rc = -ENODEV;
5477 0 : goto err_open;
5478 : }
5479 :
5480 3 : bdev = spdk_bdev_desc_get_bdev(ctx->desc);
5481 3 : if (bdev->module != &nvme_if) {
5482 0 : SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
5483 0 : rc = -ENODEV;
5484 0 : goto err_module;
5485 : }
5486 3 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
5487 :
5488 3 : pthread_mutex_lock(&nbdev->mutex);
5489 3 : nbdev->mp_policy = policy;
5490 3 : nbdev->mp_selector = selector;
5491 3 : nbdev->rr_min_io = rr_min_io;
5492 3 : pthread_mutex_unlock(&nbdev->mutex);
5493 :
5494 3 : nvme_bdev_for_each_channel(nbdev,
5495 : _bdev_nvme_set_multipath_policy,
5496 : ctx,
5497 : bdev_nvme_set_multipath_policy_done);
5498 3 : return;
5499 :
5500 0 : err_module:
5501 0 : spdk_bdev_close(ctx->desc);
5502 0 : err_open:
5503 0 : free(ctx);
5504 0 : exit:
5505 0 : cb_fn(cb_arg, rc);
5506 : }
5507 :
5508 : static void
5509 3 : aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
5510 : {
5511 3 : struct nvme_ctrlr *nvme_ctrlr = arg;
5512 : union spdk_nvme_async_event_completion event;
5513 :
5514 3 : if (spdk_nvme_cpl_is_error(cpl)) {
5515 0 : SPDK_WARNLOG("AER request execute failed\n");
5516 0 : return;
5517 : }
5518 :
5519 3 : event.raw = cpl->cdw0;
5520 3 : if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
5521 3 : (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
5522 2 : nvme_ctrlr_populate_namespaces(nvme_ctrlr, NULL);
5523 1 : } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
5524 1 : (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
5525 1 : nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
5526 : }
5527 : }
5528 :
5529 : static void
5530 52 : free_nvme_async_probe_ctx(struct nvme_async_probe_ctx *ctx)
5531 : {
5532 52 : spdk_keyring_put_key(ctx->drv_opts.tls_psk);
5533 52 : spdk_keyring_put_key(ctx->drv_opts.dhchap_key);
5534 52 : spdk_keyring_put_key(ctx->drv_opts.dhchap_ctrlr_key);
5535 52 : free(ctx);
5536 52 : }
5537 :
5538 : static void
5539 52 : populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, int rc)
5540 : {
5541 52 : if (ctx->cb_fn) {
5542 52 : ctx->cb_fn(ctx->cb_ctx, ctx->reported_bdevs, rc);
5543 : }
5544 :
5545 52 : ctx->namespaces_populated = true;
5546 52 : if (ctx->probe_done) {
5547 : /* The probe was already completed, so we need to free the context
5548 : * here. This can happen for cases like OCSSD, where we need to
5549 : * send additional commands to the SSD after attach.
5550 : */
5551 31 : free_nvme_async_probe_ctx(ctx);
5552 : }
5553 52 : }
5554 :
5555 : static int
5556 20 : bdev_nvme_remove_poller(void *ctx)
5557 : {
5558 20 : struct spdk_nvme_transport_id trid_pcie;
5559 :
5560 20 : if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
5561 1 : spdk_poller_unregister(&g_hotplug_poller);
5562 1 : return SPDK_POLLER_IDLE;
5563 : }
5564 :
5565 19 : memset(&trid_pcie, 0, sizeof(trid_pcie));
5566 19 : spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
5567 :
5568 19 : if (spdk_nvme_scan_attached(&trid_pcie)) {
5569 0 : SPDK_ERRLOG_RATELIMIT("spdk_nvme_scan_attached() failed\n");
5570 : }
5571 :
5572 19 : return SPDK_POLLER_BUSY;
5573 : }
5574 :
5575 : static void
5576 60 : nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr,
5577 : struct nvme_async_probe_ctx *ctx)
5578 : {
5579 60 : struct spdk_nvme_transport_id *trid = &nvme_ctrlr->active_path_id->trid;
5580 :
5581 60 : if (spdk_nvme_trtype_is_fabrics(trid->trtype)) {
5582 60 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was created to %s:%s\n",
5583 : trid->traddr, trid->trsvcid);
5584 : } else {
5585 0 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was created\n");
5586 : }
5587 :
5588 60 : spdk_io_device_register(nvme_ctrlr,
5589 : bdev_nvme_create_ctrlr_channel_cb,
5590 : bdev_nvme_destroy_ctrlr_channel_cb,
5591 : sizeof(struct nvme_ctrlr_channel),
5592 60 : nvme_ctrlr->nbdev_ctrlr->name);
5593 :
5594 60 : nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx);
5595 :
5596 60 : if (g_hotplug_poller == NULL) {
5597 2 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
5598 : NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
5599 : }
5600 60 : }
5601 :
5602 : static void
5603 30 : nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl)
5604 : {
5605 30 : struct nvme_ctrlr *nvme_ctrlr = _ctx;
5606 30 : struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx;
5607 :
5608 30 : nvme_ctrlr->probe_ctx = NULL;
5609 :
5610 30 : if (spdk_nvme_cpl_is_error(cpl)) {
5611 0 : nvme_ctrlr_delete(nvme_ctrlr);
5612 :
5613 0 : if (ctx != NULL) {
5614 0 : ctx->reported_bdevs = 0;
5615 0 : populate_namespaces_cb(ctx, -1);
5616 : }
5617 0 : return;
5618 : }
5619 :
5620 30 : nvme_ctrlr_create_done(nvme_ctrlr, ctx);
5621 : }
5622 :
5623 : static int
5624 30 : nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
5625 : struct nvme_async_probe_ctx *ctx)
5626 : {
5627 30 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5628 : const struct spdk_nvme_ctrlr_data *cdata;
5629 : uint32_t ana_log_page_size;
5630 :
5631 30 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5632 :
5633 : /* Set buffer size enough to include maximum number of allowed namespaces. */
5634 30 : ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
5635 30 : sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->mnan *
5636 : sizeof(uint32_t);
5637 :
5638 30 : nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL,
5639 : SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
5640 30 : if (nvme_ctrlr->ana_log_page == NULL) {
5641 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "could not allocate ANA log page buffer\n");
5642 0 : return -ENXIO;
5643 : }
5644 :
5645 : /* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned.
5646 : * Hence copy each descriptor to a temporary area when parsing it.
5647 : *
5648 : * Allocate a buffer whose size is as large as ANA log page buffer because
5649 : * we do not know the size of a descriptor until actually reading it.
5650 : */
5651 30 : nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
5652 30 : if (nvme_ctrlr->copied_ana_desc == NULL) {
5653 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "could not allocate a buffer to parse ANA descriptor\n");
5654 0 : return -ENOMEM;
5655 : }
5656 :
5657 30 : nvme_ctrlr->max_ana_log_page_size = ana_log_page_size;
5658 :
5659 30 : nvme_ctrlr->probe_ctx = ctx;
5660 :
5661 : /* Then, set the read size only to include the current active namespaces. */
5662 30 : ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
5663 :
5664 30 : if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
5665 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
5666 : ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
5667 0 : return -EINVAL;
5668 : }
5669 :
5670 30 : return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
5671 : SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
5672 : SPDK_NVME_GLOBAL_NS_TAG,
5673 30 : nvme_ctrlr->ana_log_page,
5674 : ana_log_page_size, 0,
5675 : nvme_ctrlr_init_ana_log_page_done,
5676 : nvme_ctrlr);
5677 : }
5678 :
5679 : /* hostnqn and subnqn were already verified before attaching a controller.
5680 : * Hence check only the multipath capability and cntlid here.
5681 : */
5682 : static bool
5683 16 : bdev_nvme_check_multipath(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct spdk_nvme_ctrlr *ctrlr)
5684 : {
5685 : struct nvme_ctrlr *tmp;
5686 : const struct spdk_nvme_ctrlr_data *cdata, *tmp_cdata;
5687 :
5688 16 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5689 :
5690 16 : if (!cdata->cmic.multi_ctrlr) {
5691 0 : SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
5692 0 : return false;
5693 : }
5694 :
5695 33 : TAILQ_FOREACH(tmp, &nbdev_ctrlr->ctrlrs, tailq) {
5696 18 : tmp_cdata = spdk_nvme_ctrlr_get_data(tmp->ctrlr);
5697 :
5698 18 : if (!tmp_cdata->cmic.multi_ctrlr) {
5699 0 : NVME_CTRLR_ERRLOG(tmp, "Ctrlr%u does not support multipath.\n", cdata->cntlid);
5700 0 : return false;
5701 : }
5702 18 : if (cdata->cntlid == tmp_cdata->cntlid) {
5703 1 : NVME_CTRLR_ERRLOG(tmp, "cntlid %u are duplicated.\n", tmp_cdata->cntlid);
5704 1 : return false;
5705 : }
5706 : }
5707 :
5708 15 : return true;
5709 : }
5710 :
5711 :
5712 : static int
5713 61 : nvme_bdev_ctrlr_create(const char *name, struct nvme_ctrlr *nvme_ctrlr)
5714 : {
5715 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
5716 61 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5717 : struct nvme_ctrlr *nctrlr;
5718 61 : int rc = 0;
5719 :
5720 61 : pthread_mutex_lock(&g_bdev_nvme_mutex);
5721 :
5722 61 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
5723 61 : if (nbdev_ctrlr != NULL) {
5724 16 : if (!bdev_nvme_check_multipath(nbdev_ctrlr, ctrlr)) {
5725 1 : rc = -EINVAL;
5726 1 : goto exit;
5727 : }
5728 32 : TAILQ_FOREACH(nctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
5729 17 : if (nctrlr->opts.multipath != nvme_ctrlr->opts.multipath) {
5730 : /* All controllers with the same name must be configured the same
5731 : * way, either for multipath or failover. If the configuration doesn't
5732 : * match - report error.
5733 : */
5734 0 : rc = -EINVAL;
5735 0 : goto exit;
5736 : }
5737 : }
5738 : } else {
5739 45 : nbdev_ctrlr = calloc(1, sizeof(*nbdev_ctrlr));
5740 45 : if (nbdev_ctrlr == NULL) {
5741 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate nvme_bdev_ctrlr.\n");
5742 0 : rc = -ENOMEM;
5743 0 : goto exit;
5744 : }
5745 45 : nbdev_ctrlr->name = strdup(name);
5746 45 : if (nbdev_ctrlr->name == NULL) {
5747 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate name of nvme_bdev_ctrlr.\n");
5748 0 : free(nbdev_ctrlr);
5749 0 : goto exit;
5750 : }
5751 45 : TAILQ_INIT(&nbdev_ctrlr->ctrlrs);
5752 45 : TAILQ_INIT(&nbdev_ctrlr->bdevs);
5753 45 : TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
5754 : }
5755 60 : nvme_ctrlr->nbdev_ctrlr = nbdev_ctrlr;
5756 60 : TAILQ_INSERT_TAIL(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
5757 61 : exit:
5758 61 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
5759 61 : return rc;
5760 : }
5761 :
5762 : static int
5763 61 : nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
5764 : const char *name,
5765 : const struct spdk_nvme_transport_id *trid,
5766 : struct nvme_async_probe_ctx *ctx)
5767 : {
5768 : struct nvme_ctrlr *nvme_ctrlr;
5769 : struct nvme_path_id *path_id;
5770 : const struct spdk_nvme_ctrlr_data *cdata;
5771 : int rc;
5772 :
5773 61 : nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
5774 61 : if (nvme_ctrlr == NULL) {
5775 0 : SPDK_ERRLOG("Failed to allocate device struct\n");
5776 0 : return -ENOMEM;
5777 : }
5778 :
5779 61 : rc = pthread_mutex_init(&nvme_ctrlr->mutex, NULL);
5780 61 : if (rc != 0) {
5781 0 : free(nvme_ctrlr);
5782 0 : return rc;
5783 : }
5784 :
5785 61 : TAILQ_INIT(&nvme_ctrlr->trids);
5786 61 : RB_INIT(&nvme_ctrlr->namespaces);
5787 :
5788 : /* Get another reference to the key, so the first one can be released from probe_ctx */
5789 61 : if (ctx != NULL) {
5790 47 : if (ctx->drv_opts.tls_psk != NULL) {
5791 0 : nvme_ctrlr->psk = spdk_keyring_get_key(
5792 : spdk_key_get_name(ctx->drv_opts.tls_psk));
5793 0 : if (nvme_ctrlr->psk == NULL) {
5794 : /* Could only happen if the key was removed in the meantime */
5795 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5796 : spdk_key_get_name(ctx->drv_opts.tls_psk));
5797 0 : rc = -ENOKEY;
5798 0 : goto err;
5799 : }
5800 : }
5801 :
5802 47 : if (ctx->drv_opts.dhchap_key != NULL) {
5803 0 : nvme_ctrlr->dhchap_key = spdk_keyring_get_key(
5804 : spdk_key_get_name(ctx->drv_opts.dhchap_key));
5805 0 : if (nvme_ctrlr->dhchap_key == NULL) {
5806 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5807 : spdk_key_get_name(ctx->drv_opts.dhchap_key));
5808 0 : rc = -ENOKEY;
5809 0 : goto err;
5810 : }
5811 : }
5812 :
5813 47 : if (ctx->drv_opts.dhchap_ctrlr_key != NULL) {
5814 0 : nvme_ctrlr->dhchap_ctrlr_key =
5815 0 : spdk_keyring_get_key(
5816 : spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
5817 0 : if (nvme_ctrlr->dhchap_ctrlr_key == NULL) {
5818 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5819 : spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
5820 0 : rc = -ENOKEY;
5821 0 : goto err;
5822 : }
5823 : }
5824 : }
5825 :
5826 61 : path_id = calloc(1, sizeof(*path_id));
5827 61 : if (path_id == NULL) {
5828 0 : SPDK_ERRLOG("Failed to allocate trid entry pointer\n");
5829 0 : rc = -ENOMEM;
5830 0 : goto err;
5831 : }
5832 :
5833 61 : path_id->trid = *trid;
5834 61 : if (ctx != NULL) {
5835 47 : memcpy(path_id->hostid.hostaddr, ctx->drv_opts.src_addr, sizeof(path_id->hostid.hostaddr));
5836 47 : memcpy(path_id->hostid.hostsvcid, ctx->drv_opts.src_svcid, sizeof(path_id->hostid.hostsvcid));
5837 : }
5838 61 : nvme_ctrlr->active_path_id = path_id;
5839 61 : TAILQ_INSERT_HEAD(&nvme_ctrlr->trids, path_id, link);
5840 :
5841 61 : nvme_ctrlr->thread = spdk_get_thread();
5842 61 : nvme_ctrlr->ctrlr = ctrlr;
5843 61 : nvme_ctrlr->ref = 1;
5844 :
5845 61 : if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
5846 0 : SPDK_ERRLOG("OCSSDs are not supported");
5847 0 : rc = -ENOTSUP;
5848 0 : goto err;
5849 : }
5850 :
5851 61 : if (ctx != NULL) {
5852 47 : memcpy(&nvme_ctrlr->opts, &ctx->bdev_opts, sizeof(ctx->bdev_opts));
5853 : } else {
5854 14 : spdk_bdev_nvme_get_default_ctrlr_opts(&nvme_ctrlr->opts);
5855 : }
5856 :
5857 61 : nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr,
5858 : g_opts.nvme_adminq_poll_period_us);
5859 :
5860 61 : if (g_opts.timeout_us > 0) {
5861 : /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */
5862 : /* If timeout_admin_us is 0 (not specified), admin uses same timeout as IO. */
5863 0 : uint64_t adm_timeout_us = (g_opts.timeout_admin_us == 0) ?
5864 0 : g_opts.timeout_us : g_opts.timeout_admin_us;
5865 0 : spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
5866 : adm_timeout_us, timeout_cb, nvme_ctrlr);
5867 : }
5868 :
5869 61 : spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
5870 61 : spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr);
5871 :
5872 61 : if (spdk_nvme_ctrlr_get_flags(ctrlr) &
5873 : SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
5874 0 : nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr);
5875 : }
5876 :
5877 61 : rc = nvme_bdev_ctrlr_create(name, nvme_ctrlr);
5878 61 : if (rc != 0) {
5879 1 : goto err;
5880 : }
5881 :
5882 60 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5883 :
5884 60 : if (cdata->cmic.ana_reporting) {
5885 30 : rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx);
5886 30 : if (rc == 0) {
5887 30 : return 0;
5888 : }
5889 : } else {
5890 30 : nvme_ctrlr_create_done(nvme_ctrlr, ctx);
5891 30 : return 0;
5892 : }
5893 :
5894 1 : err:
5895 1 : nvme_ctrlr_delete(nvme_ctrlr);
5896 1 : return rc;
5897 : }
5898 :
5899 : void
5900 33 : spdk_bdev_nvme_get_default_ctrlr_opts(struct spdk_bdev_nvme_ctrlr_opts *opts)
5901 : {
5902 33 : opts->prchk_flags = 0;
5903 33 : opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
5904 33 : opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
5905 33 : opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
5906 33 : opts->multipath = true;
5907 33 : }
5908 :
5909 : static void
5910 0 : attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
5911 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
5912 : {
5913 : char *name;
5914 :
5915 0 : name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
5916 0 : if (!name) {
5917 0 : SPDK_ERRLOG("Failed to assign name to NVMe device\n");
5918 0 : return;
5919 : }
5920 :
5921 0 : if (nvme_ctrlr_create(ctrlr, name, trid, NULL) == 0) {
5922 0 : SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name);
5923 : } else {
5924 0 : SPDK_ERRLOG("Failed to attach to %s (%s)\n", trid->traddr, name);
5925 : }
5926 :
5927 0 : free(name);
5928 : }
5929 :
5930 : static void
5931 60 : _nvme_ctrlr_destruct(void *ctx)
5932 : {
5933 60 : struct nvme_ctrlr *nvme_ctrlr = ctx;
5934 :
5935 60 : nvme_ctrlr_depopulate_namespaces(nvme_ctrlr);
5936 60 : nvme_ctrlr_release(nvme_ctrlr);
5937 60 : }
5938 :
5939 : static int
5940 57 : bdev_nvme_delete_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
5941 : {
5942 : struct nvme_probe_skip_entry *entry;
5943 :
5944 : /* The controller's destruction was already started */
5945 57 : if (nvme_ctrlr->destruct) {
5946 0 : return -EALREADY;
5947 : }
5948 :
5949 57 : if (!hotplug &&
5950 57 : nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
5951 0 : entry = calloc(1, sizeof(*entry));
5952 0 : if (!entry) {
5953 0 : return -ENOMEM;
5954 : }
5955 0 : entry->trid = nvme_ctrlr->active_path_id->trid;
5956 0 : TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
5957 : }
5958 :
5959 57 : nvme_ctrlr->destruct = true;
5960 57 : return 0;
5961 : }
5962 :
5963 : static int
5964 2 : bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
5965 : {
5966 : int rc;
5967 :
5968 2 : pthread_mutex_lock(&nvme_ctrlr->mutex);
5969 2 : rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, hotplug);
5970 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5971 :
5972 2 : if (rc == 0) {
5973 2 : _nvme_ctrlr_destruct(nvme_ctrlr);
5974 0 : } else if (rc == -EALREADY) {
5975 0 : rc = 0;
5976 : }
5977 :
5978 2 : return rc;
5979 : }
5980 :
5981 : static void
5982 0 : remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
5983 : {
5984 0 : struct nvme_ctrlr *nvme_ctrlr = cb_ctx;
5985 :
5986 0 : bdev_nvme_delete_ctrlr(nvme_ctrlr, true);
5987 0 : }
5988 :
5989 : static int
5990 0 : bdev_nvme_hotplug_probe(void *arg)
5991 : {
5992 0 : if (g_hotplug_probe_ctx == NULL) {
5993 0 : spdk_poller_unregister(&g_hotplug_probe_poller);
5994 0 : return SPDK_POLLER_IDLE;
5995 : }
5996 :
5997 0 : if (spdk_nvme_probe_poll_async(g_hotplug_probe_ctx) != -EAGAIN) {
5998 0 : g_hotplug_probe_ctx = NULL;
5999 0 : spdk_poller_unregister(&g_hotplug_probe_poller);
6000 : }
6001 :
6002 0 : return SPDK_POLLER_BUSY;
6003 : }
6004 :
6005 : static int
6006 0 : bdev_nvme_hotplug(void *arg)
6007 : {
6008 0 : struct spdk_nvme_transport_id trid_pcie;
6009 :
6010 0 : if (g_hotplug_probe_ctx) {
6011 0 : return SPDK_POLLER_BUSY;
6012 : }
6013 :
6014 0 : memset(&trid_pcie, 0, sizeof(trid_pcie));
6015 0 : spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
6016 :
6017 0 : g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
6018 : hotplug_probe_cb, attach_cb, NULL);
6019 :
6020 0 : if (g_hotplug_probe_ctx) {
6021 0 : assert(g_hotplug_probe_poller == NULL);
6022 0 : g_hotplug_probe_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug_probe, NULL, 1000);
6023 : }
6024 :
6025 0 : return SPDK_POLLER_BUSY;
6026 : }
6027 :
6028 : void
6029 0 : bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
6030 : {
6031 0 : *opts = g_opts;
6032 0 : }
6033 :
6034 : static bool bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
6035 : uint32_t reconnect_delay_sec,
6036 : uint32_t fast_io_fail_timeout_sec);
6037 :
6038 : static int
6039 0 : bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
6040 : {
6041 0 : if ((opts->timeout_us == 0) && (opts->timeout_admin_us != 0)) {
6042 : /* Can't set timeout_admin_us without also setting timeout_us */
6043 0 : SPDK_WARNLOG("Invalid options: Can't have (timeout_us == 0) with (timeout_admin_us > 0)\n");
6044 0 : return -EINVAL;
6045 : }
6046 :
6047 0 : if (opts->bdev_retry_count < -1) {
6048 0 : SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n");
6049 0 : return -EINVAL;
6050 : }
6051 :
6052 0 : if (!bdev_nvme_check_io_error_resiliency_params(opts->ctrlr_loss_timeout_sec,
6053 0 : opts->reconnect_delay_sec,
6054 0 : opts->fast_io_fail_timeout_sec)) {
6055 0 : return -EINVAL;
6056 : }
6057 :
6058 0 : return 0;
6059 : }
6060 :
6061 : int
6062 0 : bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
6063 : {
6064 : int ret;
6065 :
6066 0 : ret = bdev_nvme_validate_opts(opts);
6067 0 : if (ret) {
6068 0 : SPDK_WARNLOG("Failed to set nvme opts.\n");
6069 0 : return ret;
6070 : }
6071 :
6072 0 : if (g_bdev_nvme_init_thread != NULL) {
6073 0 : if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
6074 0 : return -EPERM;
6075 : }
6076 : }
6077 :
6078 0 : if (opts->rdma_srq_size != 0 ||
6079 0 : opts->rdma_max_cq_size != 0 ||
6080 0 : opts->rdma_cm_event_timeout_ms != 0) {
6081 0 : struct spdk_nvme_transport_opts drv_opts;
6082 :
6083 0 : spdk_nvme_transport_get_opts(&drv_opts, sizeof(drv_opts));
6084 0 : if (opts->rdma_srq_size != 0) {
6085 0 : drv_opts.rdma_srq_size = opts->rdma_srq_size;
6086 : }
6087 0 : if (opts->rdma_max_cq_size != 0) {
6088 0 : drv_opts.rdma_max_cq_size = opts->rdma_max_cq_size;
6089 : }
6090 0 : if (opts->rdma_cm_event_timeout_ms != 0) {
6091 0 : drv_opts.rdma_cm_event_timeout_ms = opts->rdma_cm_event_timeout_ms;
6092 : }
6093 :
6094 0 : ret = spdk_nvme_transport_set_opts(&drv_opts, sizeof(drv_opts));
6095 0 : if (ret) {
6096 0 : SPDK_ERRLOG("Failed to set NVMe transport opts.\n");
6097 0 : return ret;
6098 : }
6099 : }
6100 :
6101 0 : g_opts = *opts;
6102 :
6103 0 : return 0;
6104 : }
6105 :
6106 : struct set_nvme_hotplug_ctx {
6107 : uint64_t period_us;
6108 : bool enabled;
6109 : spdk_msg_fn fn;
6110 : void *fn_ctx;
6111 : };
6112 :
6113 : static void
6114 0 : set_nvme_hotplug_period_cb(void *_ctx)
6115 : {
6116 0 : struct set_nvme_hotplug_ctx *ctx = _ctx;
6117 :
6118 0 : spdk_poller_unregister(&g_hotplug_poller);
6119 0 : if (ctx->enabled) {
6120 0 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us);
6121 : } else {
6122 0 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
6123 : NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
6124 : }
6125 :
6126 0 : g_nvme_hotplug_poll_period_us = ctx->period_us;
6127 0 : g_nvme_hotplug_enabled = ctx->enabled;
6128 0 : if (ctx->fn) {
6129 0 : ctx->fn(ctx->fn_ctx);
6130 : }
6131 :
6132 0 : free(ctx);
6133 0 : }
6134 :
6135 : int
6136 0 : bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
6137 : {
6138 : struct set_nvme_hotplug_ctx *ctx;
6139 :
6140 0 : if (enabled == true && !spdk_process_is_primary()) {
6141 0 : return -EPERM;
6142 : }
6143 :
6144 0 : ctx = calloc(1, sizeof(*ctx));
6145 0 : if (ctx == NULL) {
6146 0 : return -ENOMEM;
6147 : }
6148 :
6149 0 : period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
6150 0 : ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
6151 0 : ctx->enabled = enabled;
6152 0 : ctx->fn = cb;
6153 0 : ctx->fn_ctx = cb_ctx;
6154 :
6155 0 : spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
6156 0 : return 0;
6157 : }
6158 :
6159 : static void
6160 46 : nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
6161 : struct nvme_async_probe_ctx *ctx)
6162 : {
6163 : struct nvme_ns *nvme_ns;
6164 : struct nvme_bdev *nvme_bdev;
6165 : size_t j;
6166 :
6167 46 : assert(nvme_ctrlr != NULL);
6168 :
6169 46 : if (ctx->names == NULL) {
6170 0 : ctx->reported_bdevs = 0;
6171 0 : populate_namespaces_cb(ctx, 0);
6172 0 : return;
6173 : }
6174 :
6175 : /*
6176 : * Report the new bdevs that were created in this call.
6177 : * There can be more than one bdev per NVMe controller.
6178 : */
6179 46 : j = 0;
6180 46 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
6181 94 : while (nvme_ns != NULL) {
6182 48 : nvme_bdev = nvme_ns->bdev;
6183 48 : if (j < ctx->max_bdevs) {
6184 48 : ctx->names[j] = nvme_bdev->disk.name;
6185 48 : j++;
6186 : } else {
6187 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr,
6188 : "Maximum number of namespaces supported per NVMe controller is %du. "
6189 : "Unable to return all names of created bdevs\n",
6190 : ctx->max_bdevs);
6191 0 : ctx->reported_bdevs = 0;
6192 0 : populate_namespaces_cb(ctx, -ERANGE);
6193 0 : return;
6194 : }
6195 :
6196 48 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
6197 : }
6198 :
6199 46 : ctx->reported_bdevs = j;
6200 46 : populate_namespaces_cb(ctx, 0);
6201 : }
6202 :
6203 : static int
6204 9 : bdev_nvme_check_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
6205 : struct spdk_nvme_ctrlr *new_ctrlr,
6206 : struct spdk_nvme_transport_id *trid)
6207 : {
6208 : struct nvme_path_id *tmp_trid;
6209 :
6210 9 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
6211 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "PCIe failover is not supported.\n");
6212 0 : return -ENOTSUP;
6213 : }
6214 :
6215 : /* Currently we only support failover to the same transport type. */
6216 9 : if (nvme_ctrlr->active_path_id->trid.trtype != trid->trtype) {
6217 0 : NVME_CTRLR_WARNLOG(nvme_ctrlr,
6218 : "Failover from trtype: %s to a different trtype: %s is not supported currently\n",
6219 : spdk_nvme_transport_id_trtype_str(nvme_ctrlr->active_path_id->trid.trtype),
6220 : spdk_nvme_transport_id_trtype_str(trid->trtype));
6221 0 : return -EINVAL;
6222 : }
6223 :
6224 :
6225 : /* Currently we only support failover to the same NQN. */
6226 9 : if (strncmp(trid->subnqn, nvme_ctrlr->active_path_id->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
6227 0 : NVME_CTRLR_WARNLOG(nvme_ctrlr,
6228 : "Failover from subnqn: %s to a different subnqn: %s is not supported currently\n",
6229 : nvme_ctrlr->active_path_id->trid.subnqn, trid->subnqn);
6230 0 : return -EINVAL;
6231 : }
6232 :
6233 : /* Skip all the other checks if we've already registered this path. */
6234 21 : TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) {
6235 12 : if (!spdk_nvme_transport_id_compare(&tmp_trid->trid, trid)) {
6236 0 : NVME_CTRLR_WARNLOG(nvme_ctrlr, "This path (traddr: %s subnqn: %s) is already registered\n",
6237 : trid->traddr, trid->subnqn);
6238 0 : return -EALREADY;
6239 : }
6240 : }
6241 :
6242 9 : return 0;
6243 : }
6244 :
6245 : static int
6246 9 : bdev_nvme_check_secondary_namespace(struct nvme_ctrlr *nvme_ctrlr,
6247 : struct spdk_nvme_ctrlr *new_ctrlr)
6248 : {
6249 : struct nvme_ns *nvme_ns;
6250 : struct spdk_nvme_ns *new_ns;
6251 :
6252 9 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
6253 9 : while (nvme_ns != NULL) {
6254 0 : new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id);
6255 0 : assert(new_ns != NULL);
6256 :
6257 0 : if (!bdev_nvme_compare_ns(nvme_ns->ns, new_ns)) {
6258 0 : return -EINVAL;
6259 : }
6260 :
6261 0 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
6262 : }
6263 :
6264 9 : return 0;
6265 : }
6266 :
6267 : static int
6268 9 : _bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
6269 : struct spdk_nvme_transport_id *trid)
6270 : {
6271 : struct nvme_path_id *active_id, *new_trid, *tmp_trid;
6272 :
6273 9 : new_trid = calloc(1, sizeof(*new_trid));
6274 9 : if (new_trid == NULL) {
6275 0 : return -ENOMEM;
6276 : }
6277 9 : new_trid->trid = *trid;
6278 :
6279 9 : active_id = nvme_ctrlr->active_path_id;
6280 9 : assert(active_id != NULL);
6281 9 : assert(active_id == TAILQ_FIRST(&nvme_ctrlr->trids));
6282 :
6283 : /* Skip the active trid not to replace it until it is failed. */
6284 9 : tmp_trid = TAILQ_NEXT(active_id, link);
6285 9 : if (tmp_trid == NULL) {
6286 6 : goto add_tail;
6287 : }
6288 :
6289 : /* It means the trid is faled if its last failed time is non-zero.
6290 : * Insert the new alternate trid before any failed trid.
6291 : */
6292 5 : TAILQ_FOREACH_FROM(tmp_trid, &nvme_ctrlr->trids, link) {
6293 3 : if (tmp_trid->last_failed_tsc != 0) {
6294 1 : TAILQ_INSERT_BEFORE(tmp_trid, new_trid, link);
6295 1 : return 0;
6296 : }
6297 : }
6298 :
6299 2 : add_tail:
6300 8 : TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, new_trid, link);
6301 8 : return 0;
6302 : }
6303 :
6304 : /* This is the case that a secondary path is added to an existing
6305 : * nvme_ctrlr for failover. After checking if it can access the same
6306 : * namespaces as the primary path, it is disconnected until failover occurs.
6307 : */
6308 : static int
6309 9 : bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
6310 : struct spdk_nvme_ctrlr *new_ctrlr,
6311 : struct spdk_nvme_transport_id *trid)
6312 : {
6313 : int rc;
6314 :
6315 9 : assert(nvme_ctrlr != NULL);
6316 :
6317 9 : pthread_mutex_lock(&nvme_ctrlr->mutex);
6318 :
6319 9 : rc = bdev_nvme_check_secondary_trid(nvme_ctrlr, new_ctrlr, trid);
6320 9 : if (rc != 0) {
6321 0 : goto exit;
6322 : }
6323 :
6324 9 : rc = bdev_nvme_check_secondary_namespace(nvme_ctrlr, new_ctrlr);
6325 9 : if (rc != 0) {
6326 0 : goto exit;
6327 : }
6328 :
6329 9 : rc = _bdev_nvme_add_secondary_trid(nvme_ctrlr, trid);
6330 :
6331 9 : exit:
6332 9 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6333 :
6334 9 : spdk_nvme_detach(new_ctrlr);
6335 :
6336 9 : return rc;
6337 : }
6338 :
6339 : static void
6340 47 : connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6341 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
6342 : {
6343 47 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
6344 : struct nvme_async_probe_ctx *ctx;
6345 : int rc;
6346 :
6347 47 : ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
6348 47 : ctx->ctrlr_attached = true;
6349 :
6350 47 : rc = nvme_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx);
6351 47 : if (rc != 0) {
6352 1 : ctx->reported_bdevs = 0;
6353 1 : populate_namespaces_cb(ctx, rc);
6354 : }
6355 47 : }
6356 :
6357 :
6358 : static void
6359 4 : connect_set_failover_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6360 : struct spdk_nvme_ctrlr *ctrlr,
6361 : const struct spdk_nvme_ctrlr_opts *opts)
6362 : {
6363 4 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
6364 : struct nvme_ctrlr *nvme_ctrlr;
6365 : struct nvme_async_probe_ctx *ctx;
6366 : int rc;
6367 :
6368 4 : ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
6369 4 : ctx->ctrlr_attached = true;
6370 :
6371 4 : nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
6372 4 : if (nvme_ctrlr) {
6373 4 : rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, ctrlr, &ctx->trid);
6374 : } else {
6375 0 : rc = -ENODEV;
6376 : }
6377 :
6378 4 : ctx->reported_bdevs = 0;
6379 4 : populate_namespaces_cb(ctx, rc);
6380 4 : }
6381 :
6382 : static int
6383 52 : bdev_nvme_async_poll(void *arg)
6384 : {
6385 52 : struct nvme_async_probe_ctx *ctx = arg;
6386 : int rc;
6387 :
6388 52 : rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
6389 52 : if (spdk_unlikely(rc != -EAGAIN)) {
6390 52 : ctx->probe_done = true;
6391 52 : spdk_poller_unregister(&ctx->poller);
6392 52 : if (!ctx->ctrlr_attached) {
6393 : /* The probe is done, but no controller was attached.
6394 : * That means we had a failure, so report -EIO back to
6395 : * the caller (usually the RPC). populate_namespaces_cb()
6396 : * will take care of freeing the nvme_async_probe_ctx.
6397 : */
6398 1 : ctx->reported_bdevs = 0;
6399 1 : populate_namespaces_cb(ctx, -EIO);
6400 51 : } else if (ctx->namespaces_populated) {
6401 : /* The namespaces for the attached controller were all
6402 : * populated and the response was already sent to the
6403 : * caller (usually the RPC). So free the context here.
6404 : */
6405 21 : free_nvme_async_probe_ctx(ctx);
6406 : }
6407 : }
6408 :
6409 52 : return SPDK_POLLER_BUSY;
6410 : }
6411 :
6412 : static bool
6413 71 : bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
6414 : uint32_t reconnect_delay_sec,
6415 : uint32_t fast_io_fail_timeout_sec)
6416 : {
6417 71 : if (ctrlr_loss_timeout_sec < -1) {
6418 1 : SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n");
6419 1 : return false;
6420 70 : } else if (ctrlr_loss_timeout_sec == -1) {
6421 14 : if (reconnect_delay_sec == 0) {
6422 1 : SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
6423 1 : return false;
6424 13 : } else if (fast_io_fail_timeout_sec != 0 &&
6425 : fast_io_fail_timeout_sec < reconnect_delay_sec) {
6426 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io-fail_timeout_sec.\n");
6427 1 : return false;
6428 : }
6429 56 : } else if (ctrlr_loss_timeout_sec != 0) {
6430 11 : if (reconnect_delay_sec == 0) {
6431 1 : SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
6432 1 : return false;
6433 10 : } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) {
6434 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n");
6435 1 : return false;
6436 9 : } else if (fast_io_fail_timeout_sec != 0) {
6437 6 : if (fast_io_fail_timeout_sec < reconnect_delay_sec) {
6438 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io_fail_timeout_sec.\n");
6439 1 : return false;
6440 5 : } else if (fast_io_fail_timeout_sec > (uint32_t)ctrlr_loss_timeout_sec) {
6441 1 : SPDK_ERRLOG("fast_io_fail_timeout_sec can't be more than ctrlr_loss_timeout_sec.\n");
6442 1 : return false;
6443 : }
6444 : }
6445 45 : } else if (reconnect_delay_sec != 0 || fast_io_fail_timeout_sec != 0) {
6446 2 : SPDK_ERRLOG("Both reconnect_delay_sec and fast_io_fail_timeout_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n");
6447 2 : return false;
6448 : }
6449 :
6450 62 : return true;
6451 : }
6452 :
6453 : int
6454 52 : spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid,
6455 : const char *base_name,
6456 : const char **names,
6457 : uint32_t count,
6458 : spdk_bdev_nvme_create_cb cb_fn,
6459 : void *cb_ctx,
6460 : struct spdk_nvme_ctrlr_opts *drv_opts,
6461 : struct spdk_bdev_nvme_ctrlr_opts *bdev_opts)
6462 : {
6463 : struct nvme_probe_skip_entry *entry, *tmp;
6464 : struct nvme_async_probe_ctx *ctx;
6465 : spdk_nvme_attach_cb attach_cb;
6466 : struct nvme_ctrlr *nvme_ctrlr;
6467 : int len;
6468 :
6469 : /* TODO expand this check to include both the host and target TRIDs.
6470 : * Only if both are the same should we fail.
6471 : */
6472 52 : if (nvme_ctrlr_get(trid, drv_opts->hostnqn) != NULL) {
6473 0 : SPDK_ERRLOG("A controller with the provided trid (traddr: %s, hostnqn: %s) "
6474 : "already exists.\n", trid->traddr, drv_opts->hostnqn);
6475 0 : return -EEXIST;
6476 : }
6477 :
6478 52 : len = strnlen(base_name, SPDK_CONTROLLER_NAME_MAX);
6479 :
6480 52 : if (len == 0 || len == SPDK_CONTROLLER_NAME_MAX) {
6481 0 : SPDK_ERRLOG("controller name must be between 1 and %d characters\n", SPDK_CONTROLLER_NAME_MAX - 1);
6482 0 : return -EINVAL;
6483 : }
6484 :
6485 52 : if (bdev_opts != NULL &&
6486 52 : !bdev_nvme_check_io_error_resiliency_params(bdev_opts->ctrlr_loss_timeout_sec,
6487 : bdev_opts->reconnect_delay_sec,
6488 : bdev_opts->fast_io_fail_timeout_sec)) {
6489 0 : return -EINVAL;
6490 : }
6491 :
6492 52 : ctx = calloc(1, sizeof(*ctx));
6493 52 : if (!ctx) {
6494 0 : return -ENOMEM;
6495 : }
6496 52 : ctx->base_name = base_name;
6497 52 : ctx->names = names;
6498 52 : ctx->max_bdevs = count;
6499 52 : ctx->cb_fn = cb_fn;
6500 52 : ctx->cb_ctx = cb_ctx;
6501 52 : ctx->trid = *trid;
6502 :
6503 52 : if (bdev_opts) {
6504 52 : memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
6505 : } else {
6506 0 : spdk_bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
6507 : }
6508 :
6509 52 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
6510 0 : TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
6511 0 : if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
6512 0 : TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
6513 0 : free(entry);
6514 0 : break;
6515 : }
6516 : }
6517 : }
6518 :
6519 52 : memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
6520 52 : ctx->drv_opts.transport_retry_count = g_opts.transport_retry_count;
6521 52 : ctx->drv_opts.transport_ack_timeout = g_opts.transport_ack_timeout;
6522 52 : ctx->drv_opts.keep_alive_timeout_ms = g_opts.keep_alive_timeout_ms;
6523 52 : ctx->drv_opts.disable_read_ana_log_page = true;
6524 52 : ctx->drv_opts.transport_tos = g_opts.transport_tos;
6525 :
6526 52 : if (ctx->bdev_opts.psk != NULL) {
6527 0 : ctx->drv_opts.tls_psk = spdk_keyring_get_key(ctx->bdev_opts.psk);
6528 0 : if (ctx->drv_opts.tls_psk == NULL) {
6529 0 : SPDK_ERRLOG("Could not load PSK: %s\n", ctx->bdev_opts.psk);
6530 0 : free_nvme_async_probe_ctx(ctx);
6531 0 : return -ENOKEY;
6532 : }
6533 : }
6534 :
6535 52 : if (ctx->bdev_opts.dhchap_key != NULL) {
6536 0 : ctx->drv_opts.dhchap_key = spdk_keyring_get_key(ctx->bdev_opts.dhchap_key);
6537 0 : if (ctx->drv_opts.dhchap_key == NULL) {
6538 0 : SPDK_ERRLOG("Could not load DH-HMAC-CHAP key: %s\n",
6539 : ctx->bdev_opts.dhchap_key);
6540 0 : free_nvme_async_probe_ctx(ctx);
6541 0 : return -ENOKEY;
6542 : }
6543 :
6544 0 : ctx->drv_opts.dhchap_digests = g_opts.dhchap_digests;
6545 0 : ctx->drv_opts.dhchap_dhgroups = g_opts.dhchap_dhgroups;
6546 : }
6547 52 : if (ctx->bdev_opts.dhchap_ctrlr_key != NULL) {
6548 0 : ctx->drv_opts.dhchap_ctrlr_key =
6549 0 : spdk_keyring_get_key(ctx->bdev_opts.dhchap_ctrlr_key);
6550 0 : if (ctx->drv_opts.dhchap_ctrlr_key == NULL) {
6551 0 : SPDK_ERRLOG("Could not load DH-HMAC-CHAP controller key: %s\n",
6552 : ctx->bdev_opts.dhchap_ctrlr_key);
6553 0 : free_nvme_async_probe_ctx(ctx);
6554 0 : return -ENOKEY;
6555 : }
6556 : }
6557 :
6558 52 : if (nvme_bdev_ctrlr_get_by_name(base_name) == NULL || ctx->bdev_opts.multipath) {
6559 48 : attach_cb = connect_attach_cb;
6560 : } else {
6561 4 : attach_cb = connect_set_failover_cb;
6562 : }
6563 :
6564 52 : nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
6565 52 : if (nvme_ctrlr && nvme_ctrlr->opts.multipath != ctx->bdev_opts.multipath) {
6566 : /* All controllers with the same name must be configured the same
6567 : * way, either for multipath or failover. If the configuration doesn't
6568 : * match - report error.
6569 : */
6570 0 : free_nvme_async_probe_ctx(ctx);
6571 0 : return -EINVAL;
6572 : }
6573 :
6574 52 : ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, attach_cb);
6575 52 : if (ctx->probe_ctx == NULL) {
6576 0 : SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
6577 0 : free_nvme_async_probe_ctx(ctx);
6578 0 : return -ENODEV;
6579 : }
6580 52 : ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000);
6581 :
6582 52 : return 0;
6583 : }
6584 :
6585 : struct bdev_nvme_delete_ctx {
6586 : char *name;
6587 : struct nvme_path_id path_id;
6588 : bdev_nvme_delete_done_fn delete_done;
6589 : void *delete_done_ctx;
6590 : uint64_t timeout_ticks;
6591 : struct spdk_poller *poller;
6592 : };
6593 :
6594 : static void
6595 2 : free_bdev_nvme_delete_ctx(struct bdev_nvme_delete_ctx *ctx)
6596 : {
6597 2 : if (ctx != NULL) {
6598 1 : free(ctx->name);
6599 1 : free(ctx);
6600 : }
6601 2 : }
6602 :
6603 : static bool
6604 75 : nvme_path_id_compare(struct nvme_path_id *p, const struct nvme_path_id *path_id)
6605 : {
6606 75 : if (path_id->trid.trtype != 0) {
6607 21 : if (path_id->trid.trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
6608 0 : if (strcasecmp(path_id->trid.trstring, p->trid.trstring) != 0) {
6609 0 : return false;
6610 : }
6611 : } else {
6612 21 : if (path_id->trid.trtype != p->trid.trtype) {
6613 0 : return false;
6614 : }
6615 : }
6616 : }
6617 :
6618 75 : if (!spdk_mem_all_zero(path_id->trid.traddr, sizeof(path_id->trid.traddr))) {
6619 21 : if (strcasecmp(path_id->trid.traddr, p->trid.traddr) != 0) {
6620 11 : return false;
6621 : }
6622 : }
6623 :
6624 64 : if (path_id->trid.adrfam != 0) {
6625 0 : if (path_id->trid.adrfam != p->trid.adrfam) {
6626 0 : return false;
6627 : }
6628 : }
6629 :
6630 64 : if (!spdk_mem_all_zero(path_id->trid.trsvcid, sizeof(path_id->trid.trsvcid))) {
6631 10 : if (strcasecmp(path_id->trid.trsvcid, p->trid.trsvcid) != 0) {
6632 0 : return false;
6633 : }
6634 : }
6635 :
6636 64 : if (!spdk_mem_all_zero(path_id->trid.subnqn, sizeof(path_id->trid.subnqn))) {
6637 10 : if (strcmp(path_id->trid.subnqn, p->trid.subnqn) != 0) {
6638 0 : return false;
6639 : }
6640 : }
6641 :
6642 64 : if (!spdk_mem_all_zero(path_id->hostid.hostaddr, sizeof(path_id->hostid.hostaddr))) {
6643 0 : if (strcmp(path_id->hostid.hostaddr, p->hostid.hostaddr) != 0) {
6644 0 : return false;
6645 : }
6646 : }
6647 :
6648 64 : if (!spdk_mem_all_zero(path_id->hostid.hostsvcid, sizeof(path_id->hostid.hostsvcid))) {
6649 0 : if (strcmp(path_id->hostid.hostsvcid, p->hostid.hostsvcid) != 0) {
6650 0 : return false;
6651 : }
6652 : }
6653 :
6654 64 : return true;
6655 : }
6656 :
6657 : static bool
6658 2 : nvme_path_id_exists(const char *name, const struct nvme_path_id *path_id)
6659 : {
6660 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
6661 : struct nvme_ctrlr *ctrlr;
6662 : struct nvme_path_id *p;
6663 :
6664 2 : pthread_mutex_lock(&g_bdev_nvme_mutex);
6665 2 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
6666 2 : if (!nbdev_ctrlr) {
6667 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6668 1 : return false;
6669 : }
6670 :
6671 1 : TAILQ_FOREACH(ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
6672 1 : pthread_mutex_lock(&ctrlr->mutex);
6673 1 : TAILQ_FOREACH(p, &ctrlr->trids, link) {
6674 1 : if (nvme_path_id_compare(p, path_id)) {
6675 1 : pthread_mutex_unlock(&ctrlr->mutex);
6676 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6677 1 : return true;
6678 : }
6679 : }
6680 0 : pthread_mutex_unlock(&ctrlr->mutex);
6681 : }
6682 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6683 :
6684 0 : return false;
6685 : }
6686 :
6687 : static int
6688 2 : bdev_nvme_delete_complete_poll(void *arg)
6689 : {
6690 2 : struct bdev_nvme_delete_ctx *ctx = arg;
6691 2 : int rc = 0;
6692 :
6693 2 : if (nvme_path_id_exists(ctx->name, &ctx->path_id)) {
6694 1 : if (ctx->timeout_ticks > spdk_get_ticks()) {
6695 1 : return SPDK_POLLER_BUSY;
6696 : }
6697 :
6698 0 : SPDK_ERRLOG("NVMe path '%s' still exists after delete\n", ctx->name);
6699 0 : rc = -ETIMEDOUT;
6700 : }
6701 :
6702 1 : spdk_poller_unregister(&ctx->poller);
6703 :
6704 1 : ctx->delete_done(ctx->delete_done_ctx, rc);
6705 1 : free_bdev_nvme_delete_ctx(ctx);
6706 :
6707 1 : return SPDK_POLLER_BUSY;
6708 : }
6709 :
6710 : static int
6711 64 : _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, const struct nvme_path_id *path_id)
6712 : {
6713 : struct nvme_path_id *p, *t;
6714 : spdk_msg_fn msg_fn;
6715 64 : int rc = -ENXIO;
6716 :
6717 64 : pthread_mutex_lock(&nvme_ctrlr->mutex);
6718 :
6719 74 : TAILQ_FOREACH_REVERSE_SAFE(p, &nvme_ctrlr->trids, nvme_paths, link, t) {
6720 74 : if (p == TAILQ_FIRST(&nvme_ctrlr->trids)) {
6721 64 : break;
6722 : }
6723 :
6724 10 : if (!nvme_path_id_compare(p, path_id)) {
6725 3 : continue;
6726 : }
6727 :
6728 : /* We are not using the specified path. */
6729 7 : TAILQ_REMOVE(&nvme_ctrlr->trids, p, link);
6730 7 : free(p);
6731 7 : rc = 0;
6732 : }
6733 :
6734 64 : if (p == NULL || !nvme_path_id_compare(p, path_id)) {
6735 8 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6736 8 : return rc;
6737 : }
6738 :
6739 : /* If we made it here, then this path is a match! Now we need to remove it. */
6740 :
6741 : /* This is the active path in use right now. The active path is always the first in the list. */
6742 56 : assert(p == nvme_ctrlr->active_path_id);
6743 :
6744 56 : if (!TAILQ_NEXT(p, link)) {
6745 : /* The current path is the only path. */
6746 55 : msg_fn = _nvme_ctrlr_destruct;
6747 55 : rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, false);
6748 : } else {
6749 : /* There is an alternative path. */
6750 1 : msg_fn = _bdev_nvme_reset_ctrlr;
6751 1 : rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, true);
6752 : }
6753 :
6754 56 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6755 :
6756 56 : if (rc == 0) {
6757 56 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
6758 0 : } else if (rc == -EALREADY) {
6759 0 : rc = 0;
6760 : }
6761 :
6762 56 : return rc;
6763 : }
6764 :
6765 : int
6766 49 : bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id,
6767 : bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx)
6768 : {
6769 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
6770 : struct nvme_ctrlr *nvme_ctrlr, *tmp_nvme_ctrlr;
6771 49 : struct bdev_nvme_delete_ctx *ctx = NULL;
6772 49 : int rc = -ENXIO, _rc;
6773 :
6774 49 : if (name == NULL || path_id == NULL) {
6775 0 : rc = -EINVAL;
6776 0 : goto exit;
6777 : }
6778 :
6779 49 : pthread_mutex_lock(&g_bdev_nvme_mutex);
6780 :
6781 49 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
6782 49 : if (nbdev_ctrlr == NULL) {
6783 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6784 :
6785 0 : SPDK_ERRLOG("Failed to find NVMe bdev controller\n");
6786 0 : rc = -ENODEV;
6787 0 : goto exit;
6788 : }
6789 :
6790 113 : TAILQ_FOREACH_SAFE(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq, tmp_nvme_ctrlr) {
6791 64 : _rc = _bdev_nvme_delete(nvme_ctrlr, path_id);
6792 64 : if (_rc < 0 && _rc != -ENXIO) {
6793 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6794 0 : rc = _rc;
6795 0 : goto exit;
6796 64 : } else if (_rc == 0) {
6797 : /* We traverse all remaining nvme_ctrlrs even if one nvme_ctrlr
6798 : * was deleted successfully. To remember the successful deletion,
6799 : * overwrite rc only if _rc is zero.
6800 : */
6801 58 : rc = 0;
6802 : }
6803 : }
6804 :
6805 49 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6806 :
6807 49 : if (rc != 0 || delete_done == NULL) {
6808 48 : goto exit;
6809 : }
6810 :
6811 1 : ctx = calloc(1, sizeof(*ctx));
6812 1 : if (ctx == NULL) {
6813 0 : SPDK_ERRLOG("Failed to allocate context for bdev_nvme_delete\n");
6814 0 : rc = -ENOMEM;
6815 0 : goto exit;
6816 : }
6817 :
6818 1 : ctx->name = strdup(name);
6819 1 : if (ctx->name == NULL) {
6820 0 : SPDK_ERRLOG("Failed to copy controller name for deletion\n");
6821 0 : rc = -ENOMEM;
6822 0 : goto exit;
6823 : }
6824 :
6825 1 : ctx->delete_done = delete_done;
6826 1 : ctx->delete_done_ctx = delete_done_ctx;
6827 1 : ctx->path_id = *path_id;
6828 1 : ctx->timeout_ticks = spdk_get_ticks() + 10 * spdk_get_ticks_hz();
6829 1 : ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_delete_complete_poll, ctx, 1000);
6830 1 : if (ctx->poller == NULL) {
6831 0 : SPDK_ERRLOG("Failed to register bdev_nvme_delete poller\n");
6832 0 : rc = -ENOMEM;
6833 0 : goto exit;
6834 : }
6835 :
6836 1 : exit:
6837 49 : if (rc != 0) {
6838 1 : free_bdev_nvme_delete_ctx(ctx);
6839 : }
6840 :
6841 49 : return rc;
6842 : }
6843 :
6844 : #define DISCOVERY_INFOLOG(ctx, format, ...) \
6845 : SPDK_INFOLOG(bdev_nvme, "Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
6846 :
6847 : #define DISCOVERY_ERRLOG(ctx, format, ...) \
6848 : SPDK_ERRLOG("Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
6849 :
6850 : struct discovery_entry_ctx {
6851 : char name[128];
6852 : struct spdk_nvme_transport_id trid;
6853 : struct spdk_nvme_ctrlr_opts drv_opts;
6854 : struct spdk_nvmf_discovery_log_page_entry entry;
6855 : TAILQ_ENTRY(discovery_entry_ctx) tailq;
6856 : struct discovery_ctx *ctx;
6857 : };
6858 :
6859 : struct discovery_ctx {
6860 : char *name;
6861 : spdk_bdev_nvme_start_discovery_fn start_cb_fn;
6862 : spdk_bdev_nvme_stop_discovery_fn stop_cb_fn;
6863 : void *cb_ctx;
6864 : struct spdk_nvme_probe_ctx *probe_ctx;
6865 : struct spdk_nvme_detach_ctx *detach_ctx;
6866 : struct spdk_nvme_ctrlr *ctrlr;
6867 : struct spdk_nvme_transport_id trid;
6868 : struct discovery_entry_ctx *entry_ctx_in_use;
6869 : struct spdk_poller *poller;
6870 : struct spdk_nvme_ctrlr_opts drv_opts;
6871 : struct spdk_bdev_nvme_ctrlr_opts bdev_opts;
6872 : struct spdk_nvmf_discovery_log_page *log_page;
6873 : TAILQ_ENTRY(discovery_ctx) tailq;
6874 : TAILQ_HEAD(, discovery_entry_ctx) nvm_entry_ctxs;
6875 : TAILQ_HEAD(, discovery_entry_ctx) discovery_entry_ctxs;
6876 : int rc;
6877 : bool wait_for_attach;
6878 : uint64_t timeout_ticks;
6879 : /* Denotes that the discovery service is being started. We're waiting
6880 : * for the initial connection to the discovery controller to be
6881 : * established and attach discovered NVM ctrlrs.
6882 : */
6883 : bool initializing;
6884 : /* Denotes if a discovery is currently in progress for this context.
6885 : * That includes connecting to newly discovered subsystems. Used to
6886 : * ensure we do not start a new discovery until an existing one is
6887 : * complete.
6888 : */
6889 : bool in_progress;
6890 :
6891 : /* Denotes if another discovery is needed after the one in progress
6892 : * completes. Set when we receive an AER completion while a discovery
6893 : * is already in progress.
6894 : */
6895 : bool pending;
6896 :
6897 : /* Signal to the discovery context poller that it should stop the
6898 : * discovery service, including detaching from the current discovery
6899 : * controller.
6900 : */
6901 : bool stop;
6902 :
6903 : struct spdk_thread *calling_thread;
6904 : uint32_t index;
6905 : uint32_t attach_in_progress;
6906 : char *hostnqn;
6907 :
6908 : /* Denotes if the discovery service was started by the mdns discovery.
6909 : */
6910 : bool from_mdns_discovery_service;
6911 : };
6912 :
6913 : TAILQ_HEAD(discovery_ctxs, discovery_ctx);
6914 : static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs);
6915 :
6916 : static void get_discovery_log_page(struct discovery_ctx *ctx);
6917 :
6918 : static void
6919 0 : free_discovery_ctx(struct discovery_ctx *ctx)
6920 : {
6921 0 : free(ctx->log_page);
6922 0 : free(ctx->hostnqn);
6923 0 : free(ctx->name);
6924 0 : free(ctx);
6925 0 : }
6926 :
6927 : static void
6928 0 : discovery_complete(struct discovery_ctx *ctx)
6929 : {
6930 0 : ctx->initializing = false;
6931 0 : ctx->in_progress = false;
6932 0 : if (ctx->pending) {
6933 0 : ctx->pending = false;
6934 0 : get_discovery_log_page(ctx);
6935 : }
6936 0 : }
6937 :
6938 : static void
6939 0 : build_trid_from_log_page_entry(struct spdk_nvme_transport_id *trid,
6940 : struct spdk_nvmf_discovery_log_page_entry *entry)
6941 : {
6942 : char *space;
6943 :
6944 0 : trid->trtype = entry->trtype;
6945 0 : trid->adrfam = entry->adrfam;
6946 0 : memcpy(trid->traddr, entry->traddr, sizeof(entry->traddr));
6947 0 : memcpy(trid->trsvcid, entry->trsvcid, sizeof(entry->trsvcid));
6948 : /* Because the source buffer (entry->subnqn) is longer than trid->subnqn, and
6949 : * before call to this function trid->subnqn is zeroed out, we need
6950 : * to copy sizeof(trid->subnqn) minus one byte to make sure the last character
6951 : * remains 0. Then we can shorten the string (replace ' ' with 0) if required
6952 : */
6953 0 : memcpy(trid->subnqn, entry->subnqn, sizeof(trid->subnqn) - 1);
6954 :
6955 : /* We want the traddr, trsvcid and subnqn fields to be NULL-terminated.
6956 : * But the log page entries typically pad them with spaces, not zeroes.
6957 : * So add a NULL terminator to each of these fields at the appropriate
6958 : * location.
6959 : */
6960 0 : space = strchr(trid->traddr, ' ');
6961 0 : if (space) {
6962 0 : *space = 0;
6963 : }
6964 0 : space = strchr(trid->trsvcid, ' ');
6965 0 : if (space) {
6966 0 : *space = 0;
6967 : }
6968 0 : space = strchr(trid->subnqn, ' ');
6969 0 : if (space) {
6970 0 : *space = 0;
6971 : }
6972 0 : }
6973 :
6974 : static void
6975 0 : _stop_discovery(void *_ctx)
6976 : {
6977 0 : struct discovery_ctx *ctx = _ctx;
6978 :
6979 0 : if (ctx->attach_in_progress > 0) {
6980 0 : spdk_thread_send_msg(spdk_get_thread(), _stop_discovery, ctx);
6981 0 : return;
6982 : }
6983 :
6984 0 : ctx->stop = true;
6985 :
6986 0 : while (!TAILQ_EMPTY(&ctx->nvm_entry_ctxs)) {
6987 : struct discovery_entry_ctx *entry_ctx;
6988 0 : struct nvme_path_id path = {};
6989 :
6990 0 : entry_ctx = TAILQ_FIRST(&ctx->nvm_entry_ctxs);
6991 0 : path.trid = entry_ctx->trid;
6992 0 : bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
6993 0 : TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
6994 0 : free(entry_ctx);
6995 : }
6996 :
6997 0 : while (!TAILQ_EMPTY(&ctx->discovery_entry_ctxs)) {
6998 : struct discovery_entry_ctx *entry_ctx;
6999 :
7000 0 : entry_ctx = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
7001 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
7002 0 : free(entry_ctx);
7003 : }
7004 :
7005 0 : free(ctx->entry_ctx_in_use);
7006 0 : ctx->entry_ctx_in_use = NULL;
7007 : }
7008 :
7009 : static void
7010 0 : stop_discovery(struct discovery_ctx *ctx, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
7011 : {
7012 0 : ctx->stop_cb_fn = cb_fn;
7013 0 : ctx->cb_ctx = cb_ctx;
7014 :
7015 0 : if (ctx->attach_in_progress > 0) {
7016 0 : DISCOVERY_INFOLOG(ctx, "stopping discovery with attach_in_progress: %"PRIu32"\n",
7017 : ctx->attach_in_progress);
7018 : }
7019 :
7020 0 : _stop_discovery(ctx);
7021 0 : }
7022 :
7023 : static void
7024 2 : remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr)
7025 : {
7026 : struct discovery_ctx *d_ctx;
7027 : struct nvme_path_id *path_id;
7028 2 : struct spdk_nvme_transport_id trid = {};
7029 : struct discovery_entry_ctx *entry_ctx, *tmp;
7030 :
7031 2 : path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
7032 :
7033 2 : TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
7034 0 : TAILQ_FOREACH_SAFE(entry_ctx, &d_ctx->nvm_entry_ctxs, tailq, tmp) {
7035 0 : build_trid_from_log_page_entry(&trid, &entry_ctx->entry);
7036 0 : if (spdk_nvme_transport_id_compare(&trid, &path_id->trid) != 0) {
7037 0 : continue;
7038 : }
7039 :
7040 0 : TAILQ_REMOVE(&d_ctx->nvm_entry_ctxs, entry_ctx, tailq);
7041 0 : free(entry_ctx);
7042 0 : DISCOVERY_INFOLOG(d_ctx, "Remove discovery entry: %s:%s:%s\n",
7043 : trid.subnqn, trid.traddr, trid.trsvcid);
7044 :
7045 : /* Fail discovery ctrlr to force reattach attempt */
7046 0 : spdk_nvme_ctrlr_fail(d_ctx->ctrlr);
7047 : }
7048 : }
7049 2 : }
7050 :
7051 : static void
7052 0 : discovery_remove_controllers(struct discovery_ctx *ctx)
7053 : {
7054 0 : struct spdk_nvmf_discovery_log_page *log_page = ctx->log_page;
7055 : struct discovery_entry_ctx *entry_ctx, *tmp;
7056 : struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
7057 0 : struct spdk_nvme_transport_id old_trid = {};
7058 : uint64_t numrec, i;
7059 : bool found;
7060 :
7061 0 : numrec = from_le64(&log_page->numrec);
7062 0 : TAILQ_FOREACH_SAFE(entry_ctx, &ctx->nvm_entry_ctxs, tailq, tmp) {
7063 0 : found = false;
7064 0 : old_entry = &entry_ctx->entry;
7065 0 : build_trid_from_log_page_entry(&old_trid, old_entry);
7066 0 : for (i = 0; i < numrec; i++) {
7067 0 : new_entry = &log_page->entries[i];
7068 0 : if (!memcmp(old_entry, new_entry, sizeof(*old_entry))) {
7069 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s found again\n",
7070 : old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
7071 0 : found = true;
7072 0 : break;
7073 : }
7074 : }
7075 0 : if (!found) {
7076 0 : struct nvme_path_id path = {};
7077 :
7078 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s not found\n",
7079 : old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
7080 :
7081 0 : path.trid = entry_ctx->trid;
7082 0 : bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
7083 0 : TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
7084 0 : free(entry_ctx);
7085 : }
7086 : }
7087 0 : free(log_page);
7088 0 : ctx->log_page = NULL;
7089 0 : discovery_complete(ctx);
7090 0 : }
7091 :
7092 : static void
7093 0 : complete_discovery_start(struct discovery_ctx *ctx, int status)
7094 : {
7095 0 : ctx->timeout_ticks = 0;
7096 0 : ctx->rc = status;
7097 0 : if (ctx->start_cb_fn) {
7098 0 : ctx->start_cb_fn(ctx->cb_ctx, status);
7099 0 : ctx->start_cb_fn = NULL;
7100 0 : ctx->cb_ctx = NULL;
7101 : }
7102 0 : }
7103 :
7104 : static void
7105 0 : discovery_attach_controller_done(void *cb_ctx, size_t bdev_count, int rc)
7106 : {
7107 0 : struct discovery_entry_ctx *entry_ctx = cb_ctx;
7108 0 : struct discovery_ctx *ctx = entry_ctx->ctx;
7109 :
7110 0 : DISCOVERY_INFOLOG(ctx, "attach %s done\n", entry_ctx->name);
7111 0 : ctx->attach_in_progress--;
7112 0 : if (ctx->attach_in_progress == 0) {
7113 0 : complete_discovery_start(ctx, ctx->rc);
7114 0 : if (ctx->initializing && ctx->rc != 0) {
7115 0 : DISCOVERY_ERRLOG(ctx, "stopping discovery due to errors: %d\n", ctx->rc);
7116 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
7117 : } else {
7118 0 : discovery_remove_controllers(ctx);
7119 : }
7120 : }
7121 0 : }
7122 :
7123 : static struct discovery_entry_ctx *
7124 0 : create_discovery_entry_ctx(struct discovery_ctx *ctx, struct spdk_nvme_transport_id *trid)
7125 : {
7126 : struct discovery_entry_ctx *new_ctx;
7127 :
7128 0 : new_ctx = calloc(1, sizeof(*new_ctx));
7129 0 : if (new_ctx == NULL) {
7130 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
7131 0 : return NULL;
7132 : }
7133 :
7134 0 : new_ctx->ctx = ctx;
7135 0 : memcpy(&new_ctx->trid, trid, sizeof(*trid));
7136 0 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
7137 0 : snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
7138 0 : return new_ctx;
7139 : }
7140 :
7141 : static void
7142 0 : discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
7143 : struct spdk_nvmf_discovery_log_page *log_page)
7144 : {
7145 0 : struct discovery_ctx *ctx = cb_arg;
7146 : struct discovery_entry_ctx *entry_ctx, *tmp;
7147 : struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
7148 : uint64_t numrec, i;
7149 : bool found;
7150 :
7151 0 : if (rc || spdk_nvme_cpl_is_error(cpl)) {
7152 0 : DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
7153 0 : return;
7154 : }
7155 :
7156 0 : ctx->log_page = log_page;
7157 0 : assert(ctx->attach_in_progress == 0);
7158 0 : numrec = from_le64(&log_page->numrec);
7159 0 : TAILQ_FOREACH_SAFE(entry_ctx, &ctx->discovery_entry_ctxs, tailq, tmp) {
7160 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
7161 0 : free(entry_ctx);
7162 : }
7163 0 : for (i = 0; i < numrec; i++) {
7164 0 : found = false;
7165 0 : new_entry = &log_page->entries[i];
7166 0 : if (new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT ||
7167 0 : new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
7168 : struct discovery_entry_ctx *new_ctx;
7169 0 : struct spdk_nvme_transport_id trid = {};
7170 :
7171 0 : build_trid_from_log_page_entry(&trid, new_entry);
7172 0 : new_ctx = create_discovery_entry_ctx(ctx, &trid);
7173 0 : if (new_ctx == NULL) {
7174 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
7175 0 : break;
7176 : }
7177 :
7178 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, new_ctx, tailq);
7179 0 : continue;
7180 : }
7181 0 : TAILQ_FOREACH(entry_ctx, &ctx->nvm_entry_ctxs, tailq) {
7182 0 : old_entry = &entry_ctx->entry;
7183 0 : if (!memcmp(new_entry, old_entry, sizeof(*new_entry))) {
7184 0 : found = true;
7185 0 : break;
7186 : }
7187 : }
7188 0 : if (!found) {
7189 0 : struct discovery_entry_ctx *subnqn_ctx = NULL, *new_ctx;
7190 : struct discovery_ctx *d_ctx;
7191 :
7192 0 : TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
7193 0 : TAILQ_FOREACH(subnqn_ctx, &d_ctx->nvm_entry_ctxs, tailq) {
7194 0 : if (!memcmp(subnqn_ctx->entry.subnqn, new_entry->subnqn,
7195 : sizeof(new_entry->subnqn))) {
7196 0 : break;
7197 : }
7198 : }
7199 0 : if (subnqn_ctx) {
7200 0 : break;
7201 : }
7202 : }
7203 :
7204 0 : new_ctx = calloc(1, sizeof(*new_ctx));
7205 0 : if (new_ctx == NULL) {
7206 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
7207 0 : break;
7208 : }
7209 :
7210 0 : new_ctx->ctx = ctx;
7211 0 : memcpy(&new_ctx->entry, new_entry, sizeof(*new_entry));
7212 0 : build_trid_from_log_page_entry(&new_ctx->trid, new_entry);
7213 0 : if (subnqn_ctx) {
7214 0 : snprintf(new_ctx->name, sizeof(new_ctx->name), "%s", subnqn_ctx->name);
7215 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new path for %s\n",
7216 : new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
7217 : new_ctx->name);
7218 : } else {
7219 0 : snprintf(new_ctx->name, sizeof(new_ctx->name), "%s%d", ctx->name, ctx->index++);
7220 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new subsystem %s\n",
7221 : new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
7222 : new_ctx->name);
7223 : }
7224 0 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
7225 0 : snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
7226 0 : rc = spdk_bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0,
7227 : discovery_attach_controller_done, new_ctx,
7228 : &new_ctx->drv_opts, &ctx->bdev_opts);
7229 0 : if (rc == 0) {
7230 0 : TAILQ_INSERT_TAIL(&ctx->nvm_entry_ctxs, new_ctx, tailq);
7231 0 : ctx->attach_in_progress++;
7232 : } else {
7233 0 : DISCOVERY_ERRLOG(ctx, "spdk_bdev_nvme_create failed (%s)\n", spdk_strerror(-rc));
7234 : }
7235 : }
7236 : }
7237 :
7238 0 : if (ctx->attach_in_progress == 0) {
7239 0 : discovery_remove_controllers(ctx);
7240 : }
7241 : }
7242 :
7243 : static void
7244 0 : get_discovery_log_page(struct discovery_ctx *ctx)
7245 : {
7246 : int rc;
7247 :
7248 0 : assert(ctx->in_progress == false);
7249 0 : ctx->in_progress = true;
7250 0 : rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx);
7251 0 : if (rc != 0) {
7252 0 : DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
7253 : }
7254 0 : DISCOVERY_INFOLOG(ctx, "sent discovery log page command\n");
7255 0 : }
7256 :
7257 : static void
7258 0 : discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
7259 : {
7260 0 : struct discovery_ctx *ctx = arg;
7261 0 : uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
7262 :
7263 0 : if (spdk_nvme_cpl_is_error(cpl)) {
7264 0 : DISCOVERY_ERRLOG(ctx, "aer failed\n");
7265 0 : return;
7266 : }
7267 :
7268 0 : if (log_page_id != SPDK_NVME_LOG_DISCOVERY) {
7269 0 : DISCOVERY_ERRLOG(ctx, "unexpected log page 0x%x\n", log_page_id);
7270 0 : return;
7271 : }
7272 :
7273 0 : DISCOVERY_INFOLOG(ctx, "got aer\n");
7274 0 : if (ctx->in_progress) {
7275 0 : ctx->pending = true;
7276 0 : return;
7277 : }
7278 :
7279 0 : get_discovery_log_page(ctx);
7280 : }
7281 :
7282 : static void
7283 0 : discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
7284 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
7285 : {
7286 0 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
7287 : struct discovery_ctx *ctx;
7288 :
7289 0 : ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, drv_opts);
7290 :
7291 0 : DISCOVERY_INFOLOG(ctx, "discovery ctrlr attached\n");
7292 0 : ctx->probe_ctx = NULL;
7293 0 : ctx->ctrlr = ctrlr;
7294 :
7295 0 : if (ctx->rc != 0) {
7296 0 : DISCOVERY_ERRLOG(ctx, "encountered error while attaching discovery ctrlr: %d\n",
7297 : ctx->rc);
7298 0 : return;
7299 : }
7300 :
7301 0 : spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx);
7302 : }
7303 :
7304 : static int
7305 0 : discovery_poller(void *arg)
7306 : {
7307 0 : struct discovery_ctx *ctx = arg;
7308 : struct spdk_nvme_transport_id *trid;
7309 : int rc;
7310 :
7311 0 : if (ctx->detach_ctx) {
7312 0 : rc = spdk_nvme_detach_poll_async(ctx->detach_ctx);
7313 0 : if (rc != -EAGAIN) {
7314 0 : ctx->detach_ctx = NULL;
7315 0 : ctx->ctrlr = NULL;
7316 : }
7317 0 : } else if (ctx->stop) {
7318 0 : if (ctx->ctrlr != NULL) {
7319 0 : rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
7320 0 : if (rc == 0) {
7321 0 : return SPDK_POLLER_BUSY;
7322 : }
7323 0 : DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
7324 : }
7325 0 : spdk_poller_unregister(&ctx->poller);
7326 0 : TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
7327 0 : assert(ctx->start_cb_fn == NULL);
7328 0 : if (ctx->stop_cb_fn != NULL) {
7329 0 : ctx->stop_cb_fn(ctx->cb_ctx);
7330 : }
7331 0 : free_discovery_ctx(ctx);
7332 0 : } else if (ctx->probe_ctx == NULL && ctx->ctrlr == NULL) {
7333 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7334 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
7335 0 : assert(ctx->initializing);
7336 0 : spdk_poller_unregister(&ctx->poller);
7337 0 : TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
7338 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7339 0 : stop_discovery(ctx, NULL, NULL);
7340 0 : free_discovery_ctx(ctx);
7341 0 : return SPDK_POLLER_BUSY;
7342 : }
7343 :
7344 0 : assert(ctx->entry_ctx_in_use == NULL);
7345 0 : ctx->entry_ctx_in_use = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
7346 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7347 0 : trid = &ctx->entry_ctx_in_use->trid;
7348 :
7349 : /* All controllers must be configured explicitely either for multipath or failover.
7350 : * While discovery use multipath mode, we need to set this in bdev options as well.
7351 : */
7352 0 : ctx->bdev_opts.multipath = true;
7353 :
7354 0 : ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, discovery_attach_cb);
7355 0 : if (ctx->probe_ctx) {
7356 0 : spdk_poller_unregister(&ctx->poller);
7357 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000);
7358 : } else {
7359 0 : DISCOVERY_ERRLOG(ctx, "could not start discovery connect\n");
7360 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7361 0 : ctx->entry_ctx_in_use = NULL;
7362 : }
7363 0 : } else if (ctx->probe_ctx) {
7364 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7365 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
7366 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7367 0 : return SPDK_POLLER_BUSY;
7368 : }
7369 :
7370 0 : rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
7371 0 : if (rc != -EAGAIN) {
7372 0 : if (ctx->rc != 0) {
7373 0 : assert(ctx->initializing);
7374 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
7375 : } else {
7376 0 : assert(rc == 0);
7377 0 : DISCOVERY_INFOLOG(ctx, "discovery ctrlr connected\n");
7378 0 : ctx->rc = rc;
7379 0 : get_discovery_log_page(ctx);
7380 : }
7381 : }
7382 : } else {
7383 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7384 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching NVM ctrlrs\n");
7385 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7386 : /* We need to wait until all NVM ctrlrs are attached before we stop the
7387 : * discovery service to make sure we don't detach a ctrlr that is still
7388 : * being attached.
7389 : */
7390 0 : if (ctx->attach_in_progress == 0) {
7391 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
7392 0 : return SPDK_POLLER_BUSY;
7393 : }
7394 : }
7395 :
7396 0 : rc = spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr);
7397 0 : if (rc < 0) {
7398 0 : spdk_poller_unregister(&ctx->poller);
7399 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
7400 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7401 0 : ctx->entry_ctx_in_use = NULL;
7402 :
7403 0 : rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
7404 0 : if (rc != 0) {
7405 0 : DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
7406 0 : ctx->ctrlr = NULL;
7407 : }
7408 : }
7409 : }
7410 :
7411 0 : return SPDK_POLLER_BUSY;
7412 : }
7413 :
7414 : static void
7415 0 : start_discovery_poller(void *arg)
7416 : {
7417 0 : struct discovery_ctx *ctx = arg;
7418 :
7419 0 : TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq);
7420 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
7421 0 : }
7422 :
7423 : int
7424 0 : bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid,
7425 : const char *base_name,
7426 : struct spdk_nvme_ctrlr_opts *drv_opts,
7427 : struct spdk_bdev_nvme_ctrlr_opts *bdev_opts,
7428 : uint64_t attach_timeout,
7429 : bool from_mdns,
7430 : spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx)
7431 : {
7432 : struct discovery_ctx *ctx;
7433 : struct discovery_entry_ctx *discovery_entry_ctx;
7434 :
7435 0 : snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
7436 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7437 0 : if (strcmp(ctx->name, base_name) == 0) {
7438 0 : return -EEXIST;
7439 : }
7440 :
7441 0 : if (ctx->entry_ctx_in_use != NULL) {
7442 0 : if (!spdk_nvme_transport_id_compare(trid, &ctx->entry_ctx_in_use->trid)) {
7443 0 : return -EEXIST;
7444 : }
7445 : }
7446 :
7447 0 : TAILQ_FOREACH(discovery_entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
7448 0 : if (!spdk_nvme_transport_id_compare(trid, &discovery_entry_ctx->trid)) {
7449 0 : return -EEXIST;
7450 : }
7451 : }
7452 : }
7453 :
7454 0 : ctx = calloc(1, sizeof(*ctx));
7455 0 : if (ctx == NULL) {
7456 0 : return -ENOMEM;
7457 : }
7458 :
7459 0 : ctx->name = strdup(base_name);
7460 0 : if (ctx->name == NULL) {
7461 0 : free_discovery_ctx(ctx);
7462 0 : return -ENOMEM;
7463 : }
7464 0 : memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
7465 0 : memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
7466 0 : ctx->from_mdns_discovery_service = from_mdns;
7467 0 : ctx->bdev_opts.from_discovery_service = true;
7468 0 : ctx->calling_thread = spdk_get_thread();
7469 0 : ctx->start_cb_fn = cb_fn;
7470 0 : ctx->cb_ctx = cb_ctx;
7471 0 : ctx->initializing = true;
7472 0 : if (ctx->start_cb_fn) {
7473 : /* We can use this when dumping json to denote if this RPC parameter
7474 : * was specified or not.
7475 : */
7476 0 : ctx->wait_for_attach = true;
7477 : }
7478 0 : if (attach_timeout != 0) {
7479 0 : ctx->timeout_ticks = spdk_get_ticks() + attach_timeout *
7480 0 : spdk_get_ticks_hz() / 1000ull;
7481 : }
7482 0 : TAILQ_INIT(&ctx->nvm_entry_ctxs);
7483 0 : TAILQ_INIT(&ctx->discovery_entry_ctxs);
7484 0 : memcpy(&ctx->trid, trid, sizeof(*trid));
7485 : /* Even if user did not specify hostnqn, we can still strdup("\0"); */
7486 0 : ctx->hostnqn = strdup(ctx->drv_opts.hostnqn);
7487 0 : if (ctx->hostnqn == NULL) {
7488 0 : free_discovery_ctx(ctx);
7489 0 : return -ENOMEM;
7490 : }
7491 0 : discovery_entry_ctx = create_discovery_entry_ctx(ctx, trid);
7492 0 : if (discovery_entry_ctx == NULL) {
7493 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
7494 0 : free_discovery_ctx(ctx);
7495 0 : return -ENOMEM;
7496 : }
7497 :
7498 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, discovery_entry_ctx, tailq);
7499 0 : spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx);
7500 0 : return 0;
7501 : }
7502 :
7503 : int
7504 0 : bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
7505 : {
7506 : struct discovery_ctx *ctx;
7507 :
7508 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7509 0 : if (strcmp(name, ctx->name) == 0) {
7510 0 : if (ctx->stop) {
7511 0 : return -EALREADY;
7512 : }
7513 : /* If we're still starting the discovery service and ->rc is non-zero, we're
7514 : * going to stop it as soon as we can
7515 : */
7516 0 : if (ctx->initializing && ctx->rc != 0) {
7517 0 : return -EALREADY;
7518 : }
7519 0 : stop_discovery(ctx, cb_fn, cb_ctx);
7520 0 : return 0;
7521 : }
7522 : }
7523 :
7524 0 : return -ENOENT;
7525 : }
7526 :
7527 : static int
7528 1 : bdev_nvme_library_init(void)
7529 : {
7530 1 : g_bdev_nvme_init_thread = spdk_get_thread();
7531 :
7532 1 : spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_create_poll_group_cb,
7533 : bdev_nvme_destroy_poll_group_cb,
7534 : sizeof(struct nvme_poll_group), "nvme_poll_groups");
7535 :
7536 1 : return 0;
7537 : }
7538 :
7539 : static void
7540 1 : bdev_nvme_fini_destruct_ctrlrs(void)
7541 : {
7542 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
7543 : struct nvme_ctrlr *nvme_ctrlr;
7544 :
7545 1 : pthread_mutex_lock(&g_bdev_nvme_mutex);
7546 1 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
7547 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
7548 0 : pthread_mutex_lock(&nvme_ctrlr->mutex);
7549 0 : if (nvme_ctrlr->destruct) {
7550 : /* This controller's destruction was already started
7551 : * before the application started shutting down
7552 : */
7553 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
7554 0 : continue;
7555 : }
7556 0 : nvme_ctrlr->destruct = true;
7557 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
7558 :
7559 0 : spdk_thread_send_msg(nvme_ctrlr->thread, _nvme_ctrlr_destruct,
7560 : nvme_ctrlr);
7561 : }
7562 : }
7563 :
7564 1 : g_bdev_nvme_module_finish = true;
7565 1 : if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
7566 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
7567 1 : spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
7568 1 : spdk_bdev_module_fini_done();
7569 1 : return;
7570 : }
7571 :
7572 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
7573 : }
7574 :
7575 : static void
7576 0 : check_discovery_fini(void *arg)
7577 : {
7578 0 : if (TAILQ_EMPTY(&g_discovery_ctxs)) {
7579 0 : bdev_nvme_fini_destruct_ctrlrs();
7580 : }
7581 0 : }
7582 :
7583 : static void
7584 1 : bdev_nvme_library_fini(void)
7585 : {
7586 : struct nvme_probe_skip_entry *entry, *entry_tmp;
7587 : struct discovery_ctx *ctx;
7588 :
7589 1 : spdk_poller_unregister(&g_hotplug_poller);
7590 1 : free(g_hotplug_probe_ctx);
7591 1 : g_hotplug_probe_ctx = NULL;
7592 :
7593 1 : TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
7594 0 : TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
7595 0 : free(entry);
7596 : }
7597 :
7598 1 : assert(spdk_get_thread() == g_bdev_nvme_init_thread);
7599 1 : if (TAILQ_EMPTY(&g_discovery_ctxs)) {
7600 1 : bdev_nvme_fini_destruct_ctrlrs();
7601 : } else {
7602 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7603 0 : stop_discovery(ctx, check_discovery_fini, NULL);
7604 : }
7605 : }
7606 1 : }
7607 :
7608 : static void
7609 0 : bdev_nvme_verify_pi_error(struct nvme_bdev_io *bio)
7610 : {
7611 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7612 0 : struct spdk_bdev *bdev = bdev_io->bdev;
7613 0 : struct spdk_dif_ctx dif_ctx;
7614 0 : struct spdk_dif_error err_blk = {};
7615 : int rc;
7616 0 : struct spdk_dif_ctx_init_ext_opts dif_opts;
7617 :
7618 0 : dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
7619 0 : dif_opts.dif_pi_format = bdev->dif_pi_format;
7620 0 : rc = spdk_dif_ctx_init(&dif_ctx,
7621 0 : bdev->blocklen, bdev->md_len, bdev->md_interleave,
7622 0 : bdev->dif_is_head_of_md, bdev->dif_type,
7623 : bdev_io->u.bdev.dif_check_flags,
7624 0 : bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0, &dif_opts);
7625 0 : if (rc != 0) {
7626 0 : SPDK_ERRLOG("Initialization of DIF context failed\n");
7627 0 : return;
7628 : }
7629 :
7630 0 : if (bdev->md_interleave) {
7631 0 : rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
7632 0 : bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
7633 : } else {
7634 0 : struct iovec md_iov = {
7635 0 : .iov_base = bdev_io->u.bdev.md_buf,
7636 0 : .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len,
7637 : };
7638 :
7639 0 : rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
7640 0 : &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
7641 : }
7642 :
7643 0 : if (rc != 0) {
7644 0 : SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
7645 : err_blk.err_type, err_blk.err_offset);
7646 : } else {
7647 0 : SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
7648 : }
7649 : }
7650 :
7651 : static void
7652 0 : bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7653 : {
7654 0 : struct nvme_bdev_io *bio = ref;
7655 :
7656 0 : if (spdk_nvme_cpl_is_success(cpl)) {
7657 : /* Run PI verification for read data buffer. */
7658 0 : bdev_nvme_verify_pi_error(bio);
7659 : }
7660 :
7661 : /* Return original completion status */
7662 0 : bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
7663 0 : }
7664 :
7665 : static void
7666 3 : bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7667 : {
7668 3 : struct nvme_bdev_io *bio = ref;
7669 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7670 : int ret;
7671 :
7672 3 : if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
7673 0 : SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
7674 : cpl->status.sct, cpl->status.sc);
7675 :
7676 : /* Save completion status to use after verifying PI error. */
7677 0 : bio->cpl = *cpl;
7678 :
7679 0 : if (spdk_likely(nvme_io_path_is_available(bio->io_path))) {
7680 : /* Read without PI checking to verify PI error. */
7681 0 : ret = bdev_nvme_no_pi_readv(bio,
7682 : bdev_io->u.bdev.iovs,
7683 : bdev_io->u.bdev.iovcnt,
7684 : bdev_io->u.bdev.md_buf,
7685 : bdev_io->u.bdev.num_blocks,
7686 : bdev_io->u.bdev.offset_blocks);
7687 0 : if (ret == 0) {
7688 0 : return;
7689 : }
7690 : }
7691 : }
7692 :
7693 3 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7694 : }
7695 :
7696 : static void
7697 25 : bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7698 : {
7699 25 : struct nvme_bdev_io *bio = ref;
7700 :
7701 25 : if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
7702 0 : SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
7703 : cpl->status.sct, cpl->status.sc);
7704 : /* Run PI verification for write data buffer if PI error is detected. */
7705 0 : bdev_nvme_verify_pi_error(bio);
7706 : }
7707 :
7708 25 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7709 25 : }
7710 :
7711 : static void
7712 0 : bdev_nvme_zone_appendv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7713 : {
7714 0 : struct nvme_bdev_io *bio = ref;
7715 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7716 :
7717 : /* spdk_bdev_io_get_append_location() requires that the ALBA is stored in offset_blocks.
7718 : * Additionally, offset_blocks has to be set before calling bdev_nvme_verify_pi_error().
7719 : */
7720 0 : bdev_io->u.bdev.offset_blocks = *(uint64_t *)&cpl->cdw0;
7721 :
7722 0 : if (spdk_nvme_cpl_is_pi_error(cpl)) {
7723 0 : SPDK_ERRLOG("zone append completed with PI error (sct=%d, sc=%d)\n",
7724 : cpl->status.sct, cpl->status.sc);
7725 : /* Run PI verification for zone append data buffer if PI error is detected. */
7726 0 : bdev_nvme_verify_pi_error(bio);
7727 : }
7728 :
7729 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7730 0 : }
7731 :
7732 : static void
7733 1 : bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7734 : {
7735 1 : struct nvme_bdev_io *bio = ref;
7736 :
7737 1 : if (spdk_nvme_cpl_is_pi_error(cpl)) {
7738 0 : SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n",
7739 : cpl->status.sct, cpl->status.sc);
7740 : /* Run PI verification for compare data buffer if PI error is detected. */
7741 0 : bdev_nvme_verify_pi_error(bio);
7742 : }
7743 :
7744 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7745 1 : }
7746 :
7747 : static void
7748 4 : bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7749 : {
7750 4 : struct nvme_bdev_io *bio = ref;
7751 :
7752 : /* Compare operation completion */
7753 4 : if (!bio->first_fused_completed) {
7754 : /* Save compare result for write callback */
7755 2 : bio->cpl = *cpl;
7756 2 : bio->first_fused_completed = true;
7757 2 : return;
7758 : }
7759 :
7760 : /* Write operation completion */
7761 2 : if (spdk_nvme_cpl_is_error(&bio->cpl)) {
7762 : /* If bio->cpl is already an error, it means the compare operation failed. In that case,
7763 : * complete the IO with the compare operation's status.
7764 : */
7765 1 : if (!spdk_nvme_cpl_is_error(cpl)) {
7766 1 : SPDK_ERRLOG("Unexpected write success after compare failure.\n");
7767 : }
7768 :
7769 1 : bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
7770 : } else {
7771 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7772 : }
7773 : }
7774 :
7775 : static void
7776 1 : bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
7777 : {
7778 1 : struct nvme_bdev_io *bio = ref;
7779 :
7780 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7781 1 : }
7782 :
7783 : static int
7784 0 : fill_zone_from_report(struct spdk_bdev_zone_info *info, struct spdk_nvme_zns_zone_desc *desc)
7785 : {
7786 0 : switch (desc->zt) {
7787 0 : case SPDK_NVME_ZONE_TYPE_SEQWR:
7788 0 : info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
7789 0 : break;
7790 0 : default:
7791 0 : SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", desc->zt);
7792 0 : return -EIO;
7793 : }
7794 :
7795 0 : switch (desc->zs) {
7796 0 : case SPDK_NVME_ZONE_STATE_EMPTY:
7797 0 : info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
7798 0 : break;
7799 0 : case SPDK_NVME_ZONE_STATE_IOPEN:
7800 0 : info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
7801 0 : break;
7802 0 : case SPDK_NVME_ZONE_STATE_EOPEN:
7803 0 : info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
7804 0 : break;
7805 0 : case SPDK_NVME_ZONE_STATE_CLOSED:
7806 0 : info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
7807 0 : break;
7808 0 : case SPDK_NVME_ZONE_STATE_RONLY:
7809 0 : info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
7810 0 : break;
7811 0 : case SPDK_NVME_ZONE_STATE_FULL:
7812 0 : info->state = SPDK_BDEV_ZONE_STATE_FULL;
7813 0 : break;
7814 0 : case SPDK_NVME_ZONE_STATE_OFFLINE:
7815 0 : info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
7816 0 : break;
7817 0 : default:
7818 0 : SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", desc->zs);
7819 0 : return -EIO;
7820 : }
7821 :
7822 0 : info->zone_id = desc->zslba;
7823 0 : info->write_pointer = desc->wp;
7824 0 : info->capacity = desc->zcap;
7825 :
7826 0 : return 0;
7827 : }
7828 :
7829 : static void
7830 0 : bdev_nvme_get_zone_info_done(void *ref, const struct spdk_nvme_cpl *cpl)
7831 : {
7832 0 : struct nvme_bdev_io *bio = ref;
7833 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7834 0 : uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
7835 0 : uint32_t zones_to_copy = bdev_io->u.zone_mgmt.num_zones;
7836 0 : struct spdk_bdev_zone_info *info = bdev_io->u.zone_mgmt.buf;
7837 : uint64_t max_zones_per_buf, i;
7838 : uint32_t zone_report_bufsize;
7839 : struct spdk_nvme_ns *ns;
7840 : struct spdk_nvme_qpair *qpair;
7841 : int ret;
7842 :
7843 0 : if (spdk_nvme_cpl_is_error(cpl)) {
7844 0 : goto out_complete_io_nvme_cpl;
7845 : }
7846 :
7847 0 : if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
7848 0 : ret = -ENXIO;
7849 0 : goto out_complete_io_ret;
7850 : }
7851 :
7852 0 : ns = bio->io_path->nvme_ns->ns;
7853 0 : qpair = bio->io_path->qpair->qpair;
7854 :
7855 0 : zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
7856 0 : max_zones_per_buf = (zone_report_bufsize - sizeof(*bio->zone_report_buf)) /
7857 : sizeof(bio->zone_report_buf->descs[0]);
7858 :
7859 0 : if (bio->zone_report_buf->nr_zones > max_zones_per_buf) {
7860 0 : ret = -EINVAL;
7861 0 : goto out_complete_io_ret;
7862 : }
7863 :
7864 0 : if (!bio->zone_report_buf->nr_zones) {
7865 0 : ret = -EINVAL;
7866 0 : goto out_complete_io_ret;
7867 : }
7868 :
7869 0 : for (i = 0; i < bio->zone_report_buf->nr_zones && bio->handled_zones < zones_to_copy; i++) {
7870 0 : ret = fill_zone_from_report(&info[bio->handled_zones],
7871 0 : &bio->zone_report_buf->descs[i]);
7872 0 : if (ret) {
7873 0 : goto out_complete_io_ret;
7874 : }
7875 0 : bio->handled_zones++;
7876 : }
7877 :
7878 0 : if (bio->handled_zones < zones_to_copy) {
7879 0 : uint64_t zone_size_lba = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
7880 0 : uint64_t slba = zone_id + (zone_size_lba * bio->handled_zones);
7881 :
7882 0 : memset(bio->zone_report_buf, 0, zone_report_bufsize);
7883 0 : ret = spdk_nvme_zns_report_zones(ns, qpair,
7884 0 : bio->zone_report_buf, zone_report_bufsize,
7885 : slba, SPDK_NVME_ZRA_LIST_ALL, true,
7886 : bdev_nvme_get_zone_info_done, bio);
7887 0 : if (!ret) {
7888 0 : return;
7889 : } else {
7890 0 : goto out_complete_io_ret;
7891 : }
7892 : }
7893 :
7894 0 : out_complete_io_nvme_cpl:
7895 0 : free(bio->zone_report_buf);
7896 0 : bio->zone_report_buf = NULL;
7897 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7898 0 : return;
7899 :
7900 0 : out_complete_io_ret:
7901 0 : free(bio->zone_report_buf);
7902 0 : bio->zone_report_buf = NULL;
7903 0 : bdev_nvme_io_complete(bio, ret);
7904 : }
7905 :
7906 : static void
7907 0 : bdev_nvme_zone_management_done(void *ref, const struct spdk_nvme_cpl *cpl)
7908 : {
7909 0 : struct nvme_bdev_io *bio = ref;
7910 :
7911 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7912 0 : }
7913 :
7914 : static void
7915 4 : bdev_nvme_admin_passthru_complete_nvme_status(void *ctx)
7916 : {
7917 4 : struct nvme_bdev_io *bio = ctx;
7918 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7919 4 : const struct spdk_nvme_cpl *cpl = &bio->cpl;
7920 :
7921 4 : assert(bdev_nvme_io_type_is_admin(bdev_io->type));
7922 :
7923 4 : __bdev_nvme_io_complete(bdev_io, 0, cpl);
7924 4 : }
7925 :
7926 : static void
7927 3 : bdev_nvme_abort_complete(void *ctx)
7928 : {
7929 3 : struct nvme_bdev_io *bio = ctx;
7930 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7931 :
7932 3 : if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) {
7933 3 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS, NULL);
7934 : } else {
7935 0 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED, NULL);
7936 : }
7937 3 : }
7938 :
7939 : static void
7940 3 : bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl)
7941 : {
7942 3 : struct nvme_bdev_io *bio = ref;
7943 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7944 :
7945 3 : bio->cpl = *cpl;
7946 3 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_nvme_abort_complete, bio);
7947 3 : }
7948 :
7949 : static void
7950 4 : bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
7951 : {
7952 4 : struct nvme_bdev_io *bio = ref;
7953 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7954 :
7955 4 : bio->cpl = *cpl;
7956 4 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
7957 : bdev_nvme_admin_passthru_complete_nvme_status, bio);
7958 4 : }
7959 :
7960 : static void
7961 0 : bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
7962 : {
7963 0 : struct nvme_bdev_io *bio = ref;
7964 : struct iovec *iov;
7965 :
7966 0 : bio->iov_offset = sgl_offset;
7967 0 : for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
7968 0 : iov = &bio->iovs[bio->iovpos];
7969 0 : if (bio->iov_offset < iov->iov_len) {
7970 0 : break;
7971 : }
7972 :
7973 0 : bio->iov_offset -= iov->iov_len;
7974 : }
7975 0 : }
7976 :
7977 : static int
7978 0 : bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
7979 : {
7980 0 : struct nvme_bdev_io *bio = ref;
7981 : struct iovec *iov;
7982 :
7983 0 : assert(bio->iovpos < bio->iovcnt);
7984 :
7985 0 : iov = &bio->iovs[bio->iovpos];
7986 :
7987 0 : *address = iov->iov_base;
7988 0 : *length = iov->iov_len;
7989 :
7990 0 : if (bio->iov_offset) {
7991 0 : assert(bio->iov_offset <= iov->iov_len);
7992 0 : *address += bio->iov_offset;
7993 0 : *length -= bio->iov_offset;
7994 : }
7995 :
7996 0 : bio->iov_offset += *length;
7997 0 : if (bio->iov_offset == iov->iov_len) {
7998 0 : bio->iovpos++;
7999 0 : bio->iov_offset = 0;
8000 : }
8001 :
8002 0 : return 0;
8003 : }
8004 :
8005 : static void
8006 0 : bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset)
8007 : {
8008 0 : struct nvme_bdev_io *bio = ref;
8009 : struct iovec *iov;
8010 :
8011 0 : bio->fused_iov_offset = sgl_offset;
8012 0 : for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) {
8013 0 : iov = &bio->fused_iovs[bio->fused_iovpos];
8014 0 : if (bio->fused_iov_offset < iov->iov_len) {
8015 0 : break;
8016 : }
8017 :
8018 0 : bio->fused_iov_offset -= iov->iov_len;
8019 : }
8020 0 : }
8021 :
8022 : static int
8023 0 : bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length)
8024 : {
8025 0 : struct nvme_bdev_io *bio = ref;
8026 : struct iovec *iov;
8027 :
8028 0 : assert(bio->fused_iovpos < bio->fused_iovcnt);
8029 :
8030 0 : iov = &bio->fused_iovs[bio->fused_iovpos];
8031 :
8032 0 : *address = iov->iov_base;
8033 0 : *length = iov->iov_len;
8034 :
8035 0 : if (bio->fused_iov_offset) {
8036 0 : assert(bio->fused_iov_offset <= iov->iov_len);
8037 0 : *address += bio->fused_iov_offset;
8038 0 : *length -= bio->fused_iov_offset;
8039 : }
8040 :
8041 0 : bio->fused_iov_offset += *length;
8042 0 : if (bio->fused_iov_offset == iov->iov_len) {
8043 0 : bio->fused_iovpos++;
8044 0 : bio->fused_iov_offset = 0;
8045 : }
8046 :
8047 0 : return 0;
8048 : }
8049 :
8050 : static int
8051 0 : bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
8052 : void *md, uint64_t lba_count, uint64_t lba)
8053 : {
8054 : int rc;
8055 :
8056 0 : SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 " without PI check\n",
8057 : lba_count, lba);
8058 :
8059 0 : bio->iovs = iov;
8060 0 : bio->iovcnt = iovcnt;
8061 0 : bio->iovpos = 0;
8062 0 : bio->iov_offset = 0;
8063 :
8064 0 : rc = spdk_nvme_ns_cmd_readv_with_md(bio->io_path->nvme_ns->ns,
8065 0 : bio->io_path->qpair->qpair,
8066 : lba, lba_count,
8067 : bdev_nvme_no_pi_readv_done, bio, 0,
8068 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
8069 : md, 0, 0);
8070 :
8071 0 : if (rc != 0 && rc != -ENOMEM) {
8072 0 : SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
8073 : }
8074 0 : return rc;
8075 : }
8076 :
8077 : static int
8078 3 : bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
8079 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
8080 : struct spdk_memory_domain *domain, void *domain_ctx,
8081 : struct spdk_accel_sequence *seq)
8082 : {
8083 3 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8084 3 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8085 : int rc;
8086 :
8087 3 : SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n",
8088 : lba_count, lba);
8089 :
8090 3 : bio->iovs = iov;
8091 3 : bio->iovcnt = iovcnt;
8092 3 : bio->iovpos = 0;
8093 3 : bio->iov_offset = 0;
8094 :
8095 3 : if (domain != NULL || seq != NULL) {
8096 1 : bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
8097 1 : bio->ext_opts.memory_domain = domain;
8098 1 : bio->ext_opts.memory_domain_ctx = domain_ctx;
8099 1 : bio->ext_opts.io_flags = flags;
8100 1 : bio->ext_opts.metadata = md;
8101 1 : bio->ext_opts.accel_sequence = seq;
8102 :
8103 1 : if (iovcnt == 1) {
8104 1 : rc = spdk_nvme_ns_cmd_read_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_readv_done,
8105 : bio, &bio->ext_opts);
8106 : } else {
8107 0 : rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count,
8108 : bdev_nvme_readv_done, bio,
8109 : bdev_nvme_queued_reset_sgl,
8110 : bdev_nvme_queued_next_sge,
8111 : &bio->ext_opts);
8112 : }
8113 2 : } else if (iovcnt == 1) {
8114 2 : rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base,
8115 : md, lba, lba_count, bdev_nvme_readv_done,
8116 : bio, flags, 0, 0);
8117 : } else {
8118 0 : rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count,
8119 : bdev_nvme_readv_done, bio, flags,
8120 : bdev_nvme_queued_reset_sgl,
8121 : bdev_nvme_queued_next_sge, md, 0, 0);
8122 : }
8123 :
8124 3 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
8125 0 : SPDK_ERRLOG("readv failed: rc = %d\n", rc);
8126 : }
8127 3 : return rc;
8128 : }
8129 :
8130 : static int
8131 25 : bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
8132 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
8133 : struct spdk_memory_domain *domain, void *domain_ctx,
8134 : struct spdk_accel_sequence *seq,
8135 : union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13)
8136 : {
8137 25 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8138 25 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8139 : int rc;
8140 :
8141 25 : SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
8142 : lba_count, lba);
8143 :
8144 25 : bio->iovs = iov;
8145 25 : bio->iovcnt = iovcnt;
8146 25 : bio->iovpos = 0;
8147 25 : bio->iov_offset = 0;
8148 :
8149 25 : if (domain != NULL || seq != NULL) {
8150 0 : bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
8151 0 : bio->ext_opts.memory_domain = domain;
8152 0 : bio->ext_opts.memory_domain_ctx = domain_ctx;
8153 0 : bio->ext_opts.io_flags = flags | SPDK_NVME_IO_FLAGS_DIRECTIVE(cdw12.write.dtype);
8154 0 : bio->ext_opts.cdw13 = cdw13.raw;
8155 0 : bio->ext_opts.metadata = md;
8156 0 : bio->ext_opts.accel_sequence = seq;
8157 :
8158 0 : if (iovcnt == 1) {
8159 0 : rc = spdk_nvme_ns_cmd_write_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_writev_done,
8160 : bio, &bio->ext_opts);
8161 : } else {
8162 0 : rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count,
8163 : bdev_nvme_writev_done, bio,
8164 : bdev_nvme_queued_reset_sgl,
8165 : bdev_nvme_queued_next_sge,
8166 : &bio->ext_opts);
8167 : }
8168 25 : } else if (iovcnt == 1) {
8169 25 : rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base,
8170 : md, lba, lba_count, bdev_nvme_writev_done,
8171 : bio, flags, 0, 0);
8172 : } else {
8173 0 : rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
8174 : bdev_nvme_writev_done, bio, flags,
8175 : bdev_nvme_queued_reset_sgl,
8176 : bdev_nvme_queued_next_sge, md, 0, 0);
8177 : }
8178 :
8179 25 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
8180 0 : SPDK_ERRLOG("writev failed: rc = %d\n", rc);
8181 : }
8182 25 : return rc;
8183 : }
8184 :
8185 : static int
8186 0 : bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
8187 : void *md, uint64_t lba_count, uint64_t zslba,
8188 : uint32_t flags)
8189 : {
8190 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8191 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8192 : int rc;
8193 :
8194 0 : SPDK_DEBUGLOG(bdev_nvme, "zone append %" PRIu64 " blocks to zone start lba %#" PRIx64 "\n",
8195 : lba_count, zslba);
8196 :
8197 0 : bio->iovs = iov;
8198 0 : bio->iovcnt = iovcnt;
8199 0 : bio->iovpos = 0;
8200 0 : bio->iov_offset = 0;
8201 :
8202 0 : if (iovcnt == 1) {
8203 0 : rc = spdk_nvme_zns_zone_append_with_md(ns, qpair, iov[0].iov_base, md, zslba,
8204 : lba_count,
8205 : bdev_nvme_zone_appendv_done, bio,
8206 : flags,
8207 : 0, 0);
8208 : } else {
8209 0 : rc = spdk_nvme_zns_zone_appendv_with_md(ns, qpair, zslba, lba_count,
8210 : bdev_nvme_zone_appendv_done, bio, flags,
8211 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
8212 : md, 0, 0);
8213 : }
8214 :
8215 0 : if (rc != 0 && rc != -ENOMEM) {
8216 0 : SPDK_ERRLOG("zone append failed: rc = %d\n", rc);
8217 : }
8218 0 : return rc;
8219 : }
8220 :
8221 : static int
8222 1 : bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
8223 : void *md, uint64_t lba_count, uint64_t lba,
8224 : uint32_t flags)
8225 : {
8226 : int rc;
8227 :
8228 1 : SPDK_DEBUGLOG(bdev_nvme, "compare %" PRIu64 " blocks with offset %#" PRIx64 "\n",
8229 : lba_count, lba);
8230 :
8231 1 : bio->iovs = iov;
8232 1 : bio->iovcnt = iovcnt;
8233 1 : bio->iovpos = 0;
8234 1 : bio->iov_offset = 0;
8235 :
8236 1 : rc = spdk_nvme_ns_cmd_comparev_with_md(bio->io_path->nvme_ns->ns,
8237 1 : bio->io_path->qpair->qpair,
8238 : lba, lba_count,
8239 : bdev_nvme_comparev_done, bio, flags,
8240 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
8241 : md, 0, 0);
8242 :
8243 1 : if (rc != 0 && rc != -ENOMEM) {
8244 0 : SPDK_ERRLOG("comparev failed: rc = %d\n", rc);
8245 : }
8246 1 : return rc;
8247 : }
8248 :
8249 : static int
8250 2 : bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt,
8251 : struct iovec *write_iov, int write_iovcnt,
8252 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags)
8253 : {
8254 2 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8255 2 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8256 2 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
8257 : int rc;
8258 :
8259 2 : SPDK_DEBUGLOG(bdev_nvme, "compare and write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
8260 : lba_count, lba);
8261 :
8262 2 : bio->iovs = cmp_iov;
8263 2 : bio->iovcnt = cmp_iovcnt;
8264 2 : bio->iovpos = 0;
8265 2 : bio->iov_offset = 0;
8266 2 : bio->fused_iovs = write_iov;
8267 2 : bio->fused_iovcnt = write_iovcnt;
8268 2 : bio->fused_iovpos = 0;
8269 2 : bio->fused_iov_offset = 0;
8270 :
8271 2 : if (bdev_io->num_retries == 0) {
8272 2 : bio->first_fused_submitted = false;
8273 2 : bio->first_fused_completed = false;
8274 : }
8275 :
8276 2 : if (!bio->first_fused_submitted) {
8277 2 : flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST;
8278 2 : memset(&bio->cpl, 0, sizeof(bio->cpl));
8279 :
8280 2 : rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count,
8281 : bdev_nvme_comparev_and_writev_done, bio, flags,
8282 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0);
8283 2 : if (rc == 0) {
8284 2 : bio->first_fused_submitted = true;
8285 2 : flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST;
8286 : } else {
8287 0 : if (rc != -ENOMEM) {
8288 0 : SPDK_ERRLOG("compare failed: rc = %d\n", rc);
8289 : }
8290 0 : return rc;
8291 : }
8292 : }
8293 :
8294 2 : flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND;
8295 :
8296 2 : rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
8297 : bdev_nvme_comparev_and_writev_done, bio, flags,
8298 : bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0);
8299 2 : if (rc != 0 && rc != -ENOMEM) {
8300 0 : SPDK_ERRLOG("write failed: rc = %d\n", rc);
8301 0 : rc = 0;
8302 : }
8303 :
8304 2 : return rc;
8305 : }
8306 :
8307 : static int
8308 1 : bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
8309 : {
8310 1 : struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
8311 : struct spdk_nvme_dsm_range *range;
8312 : uint64_t offset, remaining;
8313 : uint64_t num_ranges_u64;
8314 : uint16_t num_ranges;
8315 : int rc;
8316 :
8317 1 : num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
8318 : SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8319 1 : if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
8320 0 : SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
8321 0 : return -EINVAL;
8322 : }
8323 1 : num_ranges = (uint16_t)num_ranges_u64;
8324 :
8325 1 : offset = offset_blocks;
8326 1 : remaining = num_blocks;
8327 1 : range = &dsm_ranges[0];
8328 :
8329 : /* Fill max-size ranges until the remaining blocks fit into one range */
8330 1 : while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
8331 0 : range->attributes.raw = 0;
8332 0 : range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8333 0 : range->starting_lba = offset;
8334 :
8335 0 : offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8336 0 : remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8337 0 : range++;
8338 : }
8339 :
8340 : /* Final range describes the remaining blocks */
8341 1 : range->attributes.raw = 0;
8342 1 : range->length = remaining;
8343 1 : range->starting_lba = offset;
8344 :
8345 1 : rc = spdk_nvme_ns_cmd_dataset_management(bio->io_path->nvme_ns->ns,
8346 1 : bio->io_path->qpair->qpair,
8347 : SPDK_NVME_DSM_ATTR_DEALLOCATE,
8348 : dsm_ranges, num_ranges,
8349 : bdev_nvme_queued_done, bio);
8350 :
8351 1 : return rc;
8352 : }
8353 :
8354 : static int
8355 0 : bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
8356 : {
8357 0 : if (num_blocks > UINT16_MAX + 1) {
8358 0 : SPDK_ERRLOG("NVMe write zeroes is limited to 16-bit block count\n");
8359 0 : return -EINVAL;
8360 : }
8361 :
8362 0 : return spdk_nvme_ns_cmd_write_zeroes(bio->io_path->nvme_ns->ns,
8363 0 : bio->io_path->qpair->qpair,
8364 : offset_blocks, num_blocks,
8365 : bdev_nvme_queued_done, bio,
8366 : 0);
8367 : }
8368 :
8369 : static int
8370 0 : bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones,
8371 : struct spdk_bdev_zone_info *info)
8372 : {
8373 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8374 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8375 0 : uint32_t zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
8376 0 : uint64_t zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
8377 0 : uint64_t total_zones = spdk_nvme_zns_ns_get_num_zones(ns);
8378 :
8379 0 : if (zone_id % zone_size != 0) {
8380 0 : return -EINVAL;
8381 : }
8382 :
8383 0 : if (num_zones > total_zones || !num_zones) {
8384 0 : return -EINVAL;
8385 : }
8386 :
8387 0 : assert(!bio->zone_report_buf);
8388 0 : bio->zone_report_buf = calloc(1, zone_report_bufsize);
8389 0 : if (!bio->zone_report_buf) {
8390 0 : return -ENOMEM;
8391 : }
8392 :
8393 0 : bio->handled_zones = 0;
8394 :
8395 0 : return spdk_nvme_zns_report_zones(ns, qpair, bio->zone_report_buf, zone_report_bufsize,
8396 : zone_id, SPDK_NVME_ZRA_LIST_ALL, true,
8397 : bdev_nvme_get_zone_info_done, bio);
8398 : }
8399 :
8400 : static int
8401 0 : bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
8402 : enum spdk_bdev_zone_action action)
8403 : {
8404 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8405 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8406 :
8407 0 : switch (action) {
8408 0 : case SPDK_BDEV_ZONE_CLOSE:
8409 0 : return spdk_nvme_zns_close_zone(ns, qpair, zone_id, false,
8410 : bdev_nvme_zone_management_done, bio);
8411 0 : case SPDK_BDEV_ZONE_FINISH:
8412 0 : return spdk_nvme_zns_finish_zone(ns, qpair, zone_id, false,
8413 : bdev_nvme_zone_management_done, bio);
8414 0 : case SPDK_BDEV_ZONE_OPEN:
8415 0 : return spdk_nvme_zns_open_zone(ns, qpair, zone_id, false,
8416 : bdev_nvme_zone_management_done, bio);
8417 0 : case SPDK_BDEV_ZONE_RESET:
8418 0 : return spdk_nvme_zns_reset_zone(ns, qpair, zone_id, false,
8419 : bdev_nvme_zone_management_done, bio);
8420 0 : case SPDK_BDEV_ZONE_OFFLINE:
8421 0 : return spdk_nvme_zns_offline_zone(ns, qpair, zone_id, false,
8422 : bdev_nvme_zone_management_done, bio);
8423 0 : default:
8424 0 : return -EINVAL;
8425 : }
8426 : }
8427 :
8428 : static void
8429 5 : bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
8430 : struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
8431 : {
8432 : struct nvme_io_path *io_path;
8433 : struct nvme_ctrlr *nvme_ctrlr;
8434 : uint32_t max_xfer_size;
8435 5 : int rc = -ENXIO;
8436 :
8437 : /* Choose the first ctrlr which is not failed. */
8438 8 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
8439 7 : nvme_ctrlr = io_path->qpair->ctrlr;
8440 :
8441 : /* We should skip any unavailable nvme_ctrlr rather than checking
8442 : * if the return value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
8443 : */
8444 7 : if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
8445 3 : continue;
8446 : }
8447 :
8448 4 : max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ctrlr->ctrlr);
8449 :
8450 4 : if (nbytes > max_xfer_size) {
8451 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8452 0 : rc = -EINVAL;
8453 0 : goto err;
8454 : }
8455 :
8456 4 : rc = spdk_nvme_ctrlr_cmd_admin_raw(nvme_ctrlr->ctrlr, cmd, buf, (uint32_t)nbytes,
8457 : bdev_nvme_admin_passthru_done, bio);
8458 4 : if (rc == 0) {
8459 4 : return;
8460 : }
8461 : }
8462 :
8463 1 : err:
8464 1 : bdev_nvme_admin_complete(bio, rc);
8465 : }
8466 :
8467 : static int
8468 0 : bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
8469 : void *buf, size_t nbytes)
8470 : {
8471 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8472 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8473 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8474 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8475 :
8476 0 : if (nbytes > max_xfer_size) {
8477 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8478 0 : return -EINVAL;
8479 : }
8480 :
8481 : /*
8482 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
8483 : * so fill it out automatically.
8484 : */
8485 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8486 :
8487 0 : return spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, cmd, buf,
8488 : (uint32_t)nbytes, bdev_nvme_queued_done, bio);
8489 : }
8490 :
8491 : static int
8492 0 : bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
8493 : void *buf, size_t nbytes, void *md_buf, size_t md_len)
8494 : {
8495 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8496 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8497 0 : size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
8498 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8499 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8500 :
8501 0 : if (nbytes > max_xfer_size) {
8502 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8503 0 : return -EINVAL;
8504 : }
8505 :
8506 0 : if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
8507 0 : SPDK_ERRLOG("invalid meta data buffer size\n");
8508 0 : return -EINVAL;
8509 : }
8510 :
8511 : /*
8512 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
8513 : * so fill it out automatically.
8514 : */
8515 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8516 :
8517 0 : return spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, qpair, cmd, buf,
8518 : (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
8519 : }
8520 :
8521 : static int
8522 0 : bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio,
8523 : struct spdk_nvme_cmd *cmd, struct iovec *iov, int iovcnt,
8524 : size_t nbytes, void *md_buf, size_t md_len)
8525 : {
8526 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8527 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8528 0 : size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
8529 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8530 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8531 :
8532 0 : bio->iovs = iov;
8533 0 : bio->iovcnt = iovcnt;
8534 0 : bio->iovpos = 0;
8535 0 : bio->iov_offset = 0;
8536 :
8537 0 : if (nbytes > max_xfer_size) {
8538 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8539 0 : return -EINVAL;
8540 : }
8541 :
8542 0 : if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
8543 0 : SPDK_ERRLOG("invalid meta data buffer size\n");
8544 0 : return -EINVAL;
8545 : }
8546 :
8547 : /*
8548 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands
8549 : * require a nsid, so fill it out automatically.
8550 : */
8551 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8552 :
8553 0 : return spdk_nvme_ctrlr_cmd_iov_raw_with_md(
8554 : ctrlr, qpair, cmd, (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio,
8555 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
8556 : }
8557 :
8558 : static void
8559 6 : bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
8560 : struct nvme_bdev_io *bio_to_abort)
8561 : {
8562 : struct nvme_io_path *io_path;
8563 6 : int rc = 0;
8564 :
8565 6 : rc = bdev_nvme_abort_retry_io(nbdev_ch, bio_to_abort);
8566 6 : if (rc == 0) {
8567 1 : bdev_nvme_admin_complete(bio, 0);
8568 1 : return;
8569 : }
8570 :
8571 5 : io_path = bio_to_abort->io_path;
8572 5 : if (io_path != NULL) {
8573 3 : rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
8574 3 : io_path->qpair->qpair,
8575 : bio_to_abort,
8576 : bdev_nvme_abort_done, bio);
8577 : } else {
8578 3 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
8579 2 : rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
8580 : NULL,
8581 : bio_to_abort,
8582 : bdev_nvme_abort_done, bio);
8583 :
8584 2 : if (rc != -ENOENT) {
8585 1 : break;
8586 : }
8587 : }
8588 : }
8589 :
8590 5 : if (rc != 0) {
8591 : /* If no command was found or there was any error, complete the abort
8592 : * request with failure.
8593 : */
8594 2 : bdev_nvme_admin_complete(bio, rc);
8595 : }
8596 : }
8597 :
8598 : static int
8599 0 : bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
8600 : uint64_t num_blocks)
8601 : {
8602 0 : struct spdk_nvme_scc_source_range range = {
8603 : .slba = src_offset_blocks,
8604 0 : .nlb = num_blocks - 1
8605 : };
8606 :
8607 0 : return spdk_nvme_ns_cmd_copy(bio->io_path->nvme_ns->ns,
8608 0 : bio->io_path->qpair->qpair,
8609 : &range, 1, dst_offset_blocks,
8610 : bdev_nvme_queued_done, bio);
8611 : }
8612 :
8613 : static void
8614 0 : bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
8615 : {
8616 : const char *action;
8617 : uint32_t i;
8618 :
8619 0 : if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
8620 0 : action = "reset";
8621 0 : } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
8622 0 : action = "abort";
8623 : } else {
8624 0 : action = "none";
8625 : }
8626 :
8627 0 : spdk_json_write_object_begin(w);
8628 :
8629 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_set_options");
8630 :
8631 0 : spdk_json_write_named_object_begin(w, "params");
8632 0 : spdk_json_write_named_string(w, "action_on_timeout", action);
8633 0 : spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
8634 0 : spdk_json_write_named_uint64(w, "timeout_admin_us", g_opts.timeout_admin_us);
8635 0 : spdk_json_write_named_uint32(w, "keep_alive_timeout_ms", g_opts.keep_alive_timeout_ms);
8636 0 : spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst);
8637 0 : spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight);
8638 0 : spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight);
8639 0 : spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight);
8640 0 : spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
8641 0 : spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
8642 0 : spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
8643 0 : spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
8644 0 : spdk_json_write_named_uint32(w, "transport_retry_count", g_opts.transport_retry_count);
8645 0 : spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
8646 0 : spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
8647 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
8648 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
8649 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
8650 0 : spdk_json_write_named_bool(w, "disable_auto_failback", g_opts.disable_auto_failback);
8651 0 : spdk_json_write_named_bool(w, "generate_uuids", g_opts.generate_uuids);
8652 0 : spdk_json_write_named_uint8(w, "transport_tos", g_opts.transport_tos);
8653 0 : spdk_json_write_named_bool(w, "nvme_error_stat", g_opts.nvme_error_stat);
8654 0 : spdk_json_write_named_uint32(w, "rdma_srq_size", g_opts.rdma_srq_size);
8655 0 : spdk_json_write_named_bool(w, "io_path_stat", g_opts.io_path_stat);
8656 0 : spdk_json_write_named_bool(w, "allow_accel_sequence", g_opts.allow_accel_sequence);
8657 0 : spdk_json_write_named_uint32(w, "rdma_max_cq_size", g_opts.rdma_max_cq_size);
8658 0 : spdk_json_write_named_uint16(w, "rdma_cm_event_timeout_ms", g_opts.rdma_cm_event_timeout_ms);
8659 0 : spdk_json_write_named_array_begin(w, "dhchap_digests");
8660 0 : for (i = 0; i < 32; ++i) {
8661 0 : if (g_opts.dhchap_digests & SPDK_BIT(i)) {
8662 0 : spdk_json_write_string(w, spdk_nvme_dhchap_get_digest_name(i));
8663 : }
8664 : }
8665 0 : spdk_json_write_array_end(w);
8666 0 : spdk_json_write_named_array_begin(w, "dhchap_dhgroups");
8667 0 : for (i = 0; i < 32; ++i) {
8668 0 : if (g_opts.dhchap_dhgroups & SPDK_BIT(i)) {
8669 0 : spdk_json_write_string(w, spdk_nvme_dhchap_get_dhgroup_name(i));
8670 : }
8671 : }
8672 :
8673 0 : spdk_json_write_array_end(w);
8674 0 : spdk_json_write_object_end(w);
8675 :
8676 0 : spdk_json_write_object_end(w);
8677 0 : }
8678 :
8679 : static void
8680 0 : bdev_nvme_discovery_config_json(struct spdk_json_write_ctx *w, struct discovery_ctx *ctx)
8681 : {
8682 0 : struct spdk_nvme_transport_id trid;
8683 :
8684 0 : spdk_json_write_object_begin(w);
8685 :
8686 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_start_discovery");
8687 :
8688 0 : spdk_json_write_named_object_begin(w, "params");
8689 0 : spdk_json_write_named_string(w, "name", ctx->name);
8690 0 : spdk_json_write_named_string(w, "hostnqn", ctx->hostnqn);
8691 :
8692 0 : trid = ctx->trid;
8693 0 : memset(trid.subnqn, 0, sizeof(trid.subnqn));
8694 0 : nvme_bdev_dump_trid_json(&trid, w);
8695 :
8696 0 : spdk_json_write_named_bool(w, "wait_for_attach", ctx->wait_for_attach);
8697 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", ctx->bdev_opts.ctrlr_loss_timeout_sec);
8698 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", ctx->bdev_opts.reconnect_delay_sec);
8699 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
8700 : ctx->bdev_opts.fast_io_fail_timeout_sec);
8701 0 : spdk_json_write_object_end(w);
8702 :
8703 0 : spdk_json_write_object_end(w);
8704 0 : }
8705 :
8706 : #ifdef SPDK_CONFIG_NVME_CUSE
8707 : static void
8708 0 : nvme_ctrlr_cuse_config_json(struct spdk_json_write_ctx *w,
8709 : struct nvme_ctrlr *nvme_ctrlr)
8710 0 : {
8711 0 : size_t cuse_name_size = 128;
8712 0 : char cuse_name[cuse_name_size];
8713 :
8714 0 : if (spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr,
8715 : cuse_name, &cuse_name_size) != 0) {
8716 0 : return;
8717 : }
8718 :
8719 0 : spdk_json_write_object_begin(w);
8720 :
8721 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_cuse_register");
8722 :
8723 0 : spdk_json_write_named_object_begin(w, "params");
8724 0 : spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
8725 0 : spdk_json_write_object_end(w);
8726 :
8727 0 : spdk_json_write_object_end(w);
8728 : }
8729 : #endif
8730 :
8731 : static void
8732 0 : nvme_ctrlr_config_json(struct spdk_json_write_ctx *w,
8733 : struct nvme_ctrlr *nvme_ctrlr,
8734 : struct nvme_path_id *path_id)
8735 : {
8736 : struct spdk_nvme_transport_id *trid;
8737 : const struct spdk_nvme_ctrlr_opts *opts;
8738 :
8739 0 : if (nvme_ctrlr->opts.from_discovery_service) {
8740 : /* Do not emit an RPC for this - it will be implicitly
8741 : * covered by a separate bdev_nvme_start_discovery or
8742 : * bdev_nvme_start_mdns_discovery RPC.
8743 : */
8744 0 : return;
8745 : }
8746 :
8747 0 : trid = &path_id->trid;
8748 :
8749 0 : spdk_json_write_object_begin(w);
8750 :
8751 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller");
8752 :
8753 0 : spdk_json_write_named_object_begin(w, "params");
8754 0 : spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
8755 0 : nvme_bdev_dump_trid_json(trid, w);
8756 0 : spdk_json_write_named_bool(w, "prchk_reftag",
8757 0 : (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
8758 0 : spdk_json_write_named_bool(w, "prchk_guard",
8759 0 : (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
8760 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->opts.ctrlr_loss_timeout_sec);
8761 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->opts.reconnect_delay_sec);
8762 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
8763 : nvme_ctrlr->opts.fast_io_fail_timeout_sec);
8764 0 : if (nvme_ctrlr->psk != NULL) {
8765 0 : spdk_json_write_named_string(w, "psk", spdk_key_get_name(nvme_ctrlr->psk));
8766 : }
8767 0 : if (nvme_ctrlr->dhchap_key != NULL) {
8768 0 : spdk_json_write_named_string(w, "dhchap_key",
8769 : spdk_key_get_name(nvme_ctrlr->dhchap_key));
8770 : }
8771 0 : if (nvme_ctrlr->dhchap_ctrlr_key != NULL) {
8772 0 : spdk_json_write_named_string(w, "dhchap_ctrlr_key",
8773 : spdk_key_get_name(nvme_ctrlr->dhchap_ctrlr_key));
8774 : }
8775 0 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
8776 0 : spdk_json_write_named_string(w, "hostnqn", opts->hostnqn);
8777 0 : spdk_json_write_named_bool(w, "hdgst", opts->header_digest);
8778 0 : spdk_json_write_named_bool(w, "ddgst", opts->data_digest);
8779 0 : if (opts->src_addr[0] != '\0') {
8780 0 : spdk_json_write_named_string(w, "hostaddr", opts->src_addr);
8781 : }
8782 0 : if (opts->src_svcid[0] != '\0') {
8783 0 : spdk_json_write_named_string(w, "hostsvcid", opts->src_svcid);
8784 : }
8785 :
8786 0 : if (nvme_ctrlr->opts.multipath) {
8787 0 : spdk_json_write_named_string(w, "multipath", "multipath");
8788 : }
8789 0 : spdk_json_write_object_end(w);
8790 :
8791 0 : spdk_json_write_object_end(w);
8792 : }
8793 :
8794 : static void
8795 0 : bdev_nvme_hotplug_config_json(struct spdk_json_write_ctx *w)
8796 : {
8797 0 : spdk_json_write_object_begin(w);
8798 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug");
8799 :
8800 0 : spdk_json_write_named_object_begin(w, "params");
8801 0 : spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
8802 0 : spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
8803 0 : spdk_json_write_object_end(w);
8804 :
8805 0 : spdk_json_write_object_end(w);
8806 0 : }
8807 :
8808 : static int
8809 0 : bdev_nvme_config_json(struct spdk_json_write_ctx *w)
8810 : {
8811 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
8812 : struct nvme_ctrlr *nvme_ctrlr;
8813 : struct discovery_ctx *ctx;
8814 : struct nvme_path_id *path_id;
8815 :
8816 0 : bdev_nvme_opts_config_json(w);
8817 :
8818 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
8819 :
8820 0 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
8821 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
8822 0 : path_id = nvme_ctrlr->active_path_id;
8823 0 : assert(path_id == TAILQ_FIRST(&nvme_ctrlr->trids));
8824 0 : nvme_ctrlr_config_json(w, nvme_ctrlr, path_id);
8825 :
8826 0 : path_id = TAILQ_NEXT(path_id, link);
8827 0 : while (path_id != NULL) {
8828 0 : nvme_ctrlr_config_json(w, nvme_ctrlr, path_id);
8829 0 : path_id = TAILQ_NEXT(path_id, link);
8830 : }
8831 :
8832 : #ifdef SPDK_CONFIG_NVME_CUSE
8833 0 : nvme_ctrlr_cuse_config_json(w, nvme_ctrlr);
8834 : #endif
8835 : }
8836 : }
8837 :
8838 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
8839 0 : if (!ctx->from_mdns_discovery_service) {
8840 0 : bdev_nvme_discovery_config_json(w, ctx);
8841 : }
8842 : }
8843 :
8844 0 : bdev_nvme_mdns_discovery_config_json(w);
8845 :
8846 : /* Dump as last parameter to give all NVMe bdevs chance to be constructed
8847 : * before enabling hotplug poller.
8848 : */
8849 0 : bdev_nvme_hotplug_config_json(w);
8850 :
8851 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
8852 0 : return 0;
8853 : }
8854 :
8855 : struct spdk_nvme_ctrlr *
8856 1 : bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
8857 : {
8858 : struct nvme_bdev *nbdev;
8859 : struct nvme_ns *nvme_ns;
8860 :
8861 1 : if (!bdev || bdev->module != &nvme_if) {
8862 0 : return NULL;
8863 : }
8864 :
8865 1 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
8866 1 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
8867 1 : assert(nvme_ns != NULL);
8868 :
8869 1 : return nvme_ns->ctrlr->ctrlr;
8870 : }
8871 :
8872 : static bool
8873 12 : nvme_io_path_is_current(struct nvme_io_path *io_path)
8874 : {
8875 : const struct nvme_bdev_channel *nbdev_ch;
8876 : bool current;
8877 :
8878 12 : if (!nvme_io_path_is_available(io_path)) {
8879 4 : return false;
8880 : }
8881 :
8882 8 : nbdev_ch = io_path->nbdev_ch;
8883 8 : if (nbdev_ch == NULL) {
8884 1 : current = false;
8885 7 : } else if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
8886 3 : struct nvme_io_path *optimized_io_path = NULL;
8887 :
8888 6 : STAILQ_FOREACH(optimized_io_path, &nbdev_ch->io_path_list, stailq) {
8889 5 : if (optimized_io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) {
8890 2 : break;
8891 : }
8892 : }
8893 :
8894 : /* A non-optimized path is only current if there are no optimized paths. */
8895 3 : current = (io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) ||
8896 : (optimized_io_path == NULL);
8897 : } else {
8898 4 : if (nbdev_ch->current_io_path) {
8899 1 : current = (io_path == nbdev_ch->current_io_path);
8900 : } else {
8901 : struct nvme_io_path *first_path;
8902 :
8903 : /* We arrived here as there are no optimized paths for active-passive
8904 : * mode. Check if this io_path is the first one available on the list.
8905 : */
8906 3 : current = false;
8907 3 : STAILQ_FOREACH(first_path, &nbdev_ch->io_path_list, stailq) {
8908 3 : if (nvme_io_path_is_available(first_path)) {
8909 3 : current = (io_path == first_path);
8910 3 : break;
8911 : }
8912 : }
8913 : }
8914 : }
8915 :
8916 8 : return current;
8917 : }
8918 :
8919 : static struct nvme_ctrlr *
8920 0 : bdev_nvme_next_ctrlr_unsafe(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct nvme_ctrlr *prev)
8921 : {
8922 : struct nvme_ctrlr *next;
8923 :
8924 : /* Must be called under g_bdev_nvme_mutex */
8925 0 : next = prev != NULL ? TAILQ_NEXT(prev, tailq) : TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
8926 0 : while (next != NULL) {
8927 : /* ref can be 0 when the ctrlr was released, but hasn't been detached yet */
8928 0 : pthread_mutex_lock(&next->mutex);
8929 0 : if (next->ref > 0) {
8930 0 : next->ref++;
8931 0 : pthread_mutex_unlock(&next->mutex);
8932 0 : return next;
8933 : }
8934 :
8935 0 : pthread_mutex_unlock(&next->mutex);
8936 0 : next = TAILQ_NEXT(next, tailq);
8937 : }
8938 :
8939 0 : return NULL;
8940 : }
8941 :
8942 : struct bdev_nvme_set_keys_ctx {
8943 : struct nvme_ctrlr *nctrlr;
8944 : struct spdk_key *dhchap_key;
8945 : struct spdk_key *dhchap_ctrlr_key;
8946 : struct spdk_thread *thread;
8947 : bdev_nvme_set_keys_cb cb_fn;
8948 : void *cb_ctx;
8949 : int status;
8950 : };
8951 :
8952 : static void
8953 0 : bdev_nvme_free_set_keys_ctx(struct bdev_nvme_set_keys_ctx *ctx)
8954 : {
8955 0 : if (ctx == NULL) {
8956 0 : return;
8957 : }
8958 :
8959 0 : spdk_keyring_put_key(ctx->dhchap_key);
8960 0 : spdk_keyring_put_key(ctx->dhchap_ctrlr_key);
8961 0 : free(ctx);
8962 : }
8963 :
8964 : static void
8965 0 : _bdev_nvme_set_keys_done(void *_ctx)
8966 : {
8967 0 : struct bdev_nvme_set_keys_ctx *ctx = _ctx;
8968 :
8969 0 : ctx->cb_fn(ctx->cb_ctx, ctx->status);
8970 :
8971 0 : if (ctx->nctrlr != NULL) {
8972 0 : nvme_ctrlr_release(ctx->nctrlr);
8973 : }
8974 0 : bdev_nvme_free_set_keys_ctx(ctx);
8975 0 : }
8976 :
8977 : static void
8978 0 : bdev_nvme_set_keys_done(struct bdev_nvme_set_keys_ctx *ctx, int status)
8979 : {
8980 0 : ctx->status = status;
8981 0 : spdk_thread_exec_msg(ctx->thread, _bdev_nvme_set_keys_done, ctx);
8982 0 : }
8983 :
8984 : static void bdev_nvme_authenticate_ctrlr(struct bdev_nvme_set_keys_ctx *ctx);
8985 :
8986 : static void
8987 0 : bdev_nvme_authenticate_ctrlr_continue(struct bdev_nvme_set_keys_ctx *ctx)
8988 : {
8989 : struct nvme_ctrlr *next;
8990 :
8991 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
8992 0 : next = bdev_nvme_next_ctrlr_unsafe(NULL, ctx->nctrlr);
8993 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
8994 :
8995 0 : nvme_ctrlr_release(ctx->nctrlr);
8996 0 : ctx->nctrlr = next;
8997 :
8998 0 : if (next == NULL) {
8999 0 : bdev_nvme_set_keys_done(ctx, 0);
9000 : } else {
9001 0 : bdev_nvme_authenticate_ctrlr(ctx);
9002 : }
9003 0 : }
9004 :
9005 : static void
9006 0 : bdev_nvme_authenticate_qpairs_done(struct spdk_io_channel_iter *i, int status)
9007 : {
9008 0 : struct bdev_nvme_set_keys_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
9009 :
9010 0 : if (status != 0) {
9011 0 : bdev_nvme_set_keys_done(ctx, status);
9012 0 : return;
9013 : }
9014 0 : bdev_nvme_authenticate_ctrlr_continue(ctx);
9015 : }
9016 :
9017 : static void
9018 0 : bdev_nvme_authenticate_qpair_done(void *ctx, int status)
9019 : {
9020 0 : spdk_for_each_channel_continue(ctx, status);
9021 0 : }
9022 :
9023 : static void
9024 0 : bdev_nvme_authenticate_qpair(struct spdk_io_channel_iter *i)
9025 : {
9026 0 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
9027 0 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
9028 0 : struct nvme_qpair *qpair = ctrlr_ch->qpair;
9029 : int rc;
9030 :
9031 0 : if (!nvme_qpair_is_connected(qpair)) {
9032 0 : spdk_for_each_channel_continue(i, 0);
9033 0 : return;
9034 : }
9035 :
9036 0 : rc = spdk_nvme_qpair_authenticate(qpair->qpair, bdev_nvme_authenticate_qpair_done, i);
9037 0 : if (rc != 0) {
9038 0 : spdk_for_each_channel_continue(i, rc);
9039 : }
9040 : }
9041 :
9042 : static void
9043 0 : bdev_nvme_authenticate_ctrlr_done(void *_ctx, int status)
9044 : {
9045 0 : struct bdev_nvme_set_keys_ctx *ctx = _ctx;
9046 :
9047 0 : if (status != 0) {
9048 0 : bdev_nvme_set_keys_done(ctx, status);
9049 0 : return;
9050 : }
9051 :
9052 0 : spdk_for_each_channel(ctx->nctrlr, bdev_nvme_authenticate_qpair, ctx,
9053 : bdev_nvme_authenticate_qpairs_done);
9054 : }
9055 :
9056 : static void
9057 0 : bdev_nvme_authenticate_ctrlr(struct bdev_nvme_set_keys_ctx *ctx)
9058 : {
9059 0 : struct spdk_nvme_ctrlr_key_opts opts = {};
9060 0 : struct nvme_ctrlr *nctrlr = ctx->nctrlr;
9061 : int rc;
9062 :
9063 0 : opts.size = SPDK_SIZEOF(&opts, dhchap_ctrlr_key);
9064 0 : opts.dhchap_key = ctx->dhchap_key;
9065 0 : opts.dhchap_ctrlr_key = ctx->dhchap_ctrlr_key;
9066 0 : rc = spdk_nvme_ctrlr_set_keys(nctrlr->ctrlr, &opts);
9067 0 : if (rc != 0) {
9068 0 : bdev_nvme_set_keys_done(ctx, rc);
9069 0 : return;
9070 : }
9071 :
9072 0 : if (ctx->dhchap_key != NULL) {
9073 0 : rc = spdk_nvme_ctrlr_authenticate(nctrlr->ctrlr,
9074 : bdev_nvme_authenticate_ctrlr_done, ctx);
9075 0 : if (rc != 0) {
9076 0 : bdev_nvme_set_keys_done(ctx, rc);
9077 : }
9078 : } else {
9079 0 : bdev_nvme_authenticate_ctrlr_continue(ctx);
9080 : }
9081 : }
9082 :
9083 : int
9084 0 : bdev_nvme_set_keys(const char *name, const char *dhchap_key, const char *dhchap_ctrlr_key,
9085 : bdev_nvme_set_keys_cb cb_fn, void *cb_ctx)
9086 : {
9087 : struct bdev_nvme_set_keys_ctx *ctx;
9088 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
9089 : struct nvme_ctrlr *nctrlr;
9090 :
9091 0 : ctx = calloc(1, sizeof(*ctx));
9092 0 : if (ctx == NULL) {
9093 0 : return -ENOMEM;
9094 : }
9095 :
9096 0 : if (dhchap_key != NULL) {
9097 0 : ctx->dhchap_key = spdk_keyring_get_key(dhchap_key);
9098 0 : if (ctx->dhchap_key == NULL) {
9099 0 : SPDK_ERRLOG("Could not find key %s for bdev %s\n", dhchap_key, name);
9100 0 : bdev_nvme_free_set_keys_ctx(ctx);
9101 0 : return -ENOKEY;
9102 : }
9103 : }
9104 0 : if (dhchap_ctrlr_key != NULL) {
9105 0 : ctx->dhchap_ctrlr_key = spdk_keyring_get_key(dhchap_ctrlr_key);
9106 0 : if (ctx->dhchap_ctrlr_key == NULL) {
9107 0 : SPDK_ERRLOG("Could not find key %s for bdev %s\n", dhchap_ctrlr_key, name);
9108 0 : bdev_nvme_free_set_keys_ctx(ctx);
9109 0 : return -ENOKEY;
9110 : }
9111 : }
9112 :
9113 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
9114 0 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
9115 0 : if (nbdev_ctrlr == NULL) {
9116 0 : SPDK_ERRLOG("Could not find bdev_ctrlr %s\n", name);
9117 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
9118 0 : bdev_nvme_free_set_keys_ctx(ctx);
9119 0 : return -ENODEV;
9120 : }
9121 0 : nctrlr = bdev_nvme_next_ctrlr_unsafe(nbdev_ctrlr, NULL);
9122 0 : if (nctrlr == NULL) {
9123 0 : SPDK_ERRLOG("Could not find any nvme_ctrlrs on bdev_ctrlr %s\n", name);
9124 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
9125 0 : bdev_nvme_free_set_keys_ctx(ctx);
9126 0 : return -ENODEV;
9127 : }
9128 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
9129 :
9130 0 : ctx->nctrlr = nctrlr;
9131 0 : ctx->cb_fn = cb_fn;
9132 0 : ctx->cb_ctx = cb_ctx;
9133 0 : ctx->thread = spdk_get_thread();
9134 :
9135 0 : bdev_nvme_authenticate_ctrlr(ctx);
9136 :
9137 0 : return 0;
9138 : }
9139 :
9140 : void
9141 0 : nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path)
9142 : {
9143 0 : struct nvme_ns *nvme_ns = io_path->nvme_ns;
9144 0 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
9145 : const struct spdk_nvme_ctrlr_data *cdata;
9146 : const struct spdk_nvme_transport_id *trid;
9147 : const char *adrfam_str;
9148 :
9149 0 : spdk_json_write_object_begin(w);
9150 :
9151 0 : spdk_json_write_named_string(w, "bdev_name", nvme_ns->bdev->disk.name);
9152 :
9153 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
9154 0 : trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);
9155 :
9156 0 : spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
9157 0 : spdk_json_write_named_bool(w, "current", nvme_io_path_is_current(io_path));
9158 0 : spdk_json_write_named_bool(w, "connected", nvme_qpair_is_connected(io_path->qpair));
9159 0 : spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));
9160 :
9161 0 : spdk_json_write_named_object_begin(w, "transport");
9162 0 : spdk_json_write_named_string(w, "trtype", trid->trstring);
9163 0 : spdk_json_write_named_string(w, "traddr", trid->traddr);
9164 0 : if (trid->trsvcid[0] != '\0') {
9165 0 : spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
9166 : }
9167 0 : adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
9168 0 : if (adrfam_str) {
9169 0 : spdk_json_write_named_string(w, "adrfam", adrfam_str);
9170 : }
9171 0 : spdk_json_write_object_end(w);
9172 :
9173 0 : spdk_json_write_object_end(w);
9174 0 : }
9175 :
9176 : void
9177 0 : bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w)
9178 : {
9179 : struct discovery_ctx *ctx;
9180 : struct discovery_entry_ctx *entry_ctx;
9181 :
9182 0 : spdk_json_write_array_begin(w);
9183 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
9184 0 : spdk_json_write_object_begin(w);
9185 0 : spdk_json_write_named_string(w, "name", ctx->name);
9186 :
9187 0 : spdk_json_write_named_object_begin(w, "trid");
9188 0 : nvme_bdev_dump_trid_json(&ctx->trid, w);
9189 0 : spdk_json_write_object_end(w);
9190 :
9191 0 : spdk_json_write_named_array_begin(w, "referrals");
9192 0 : TAILQ_FOREACH(entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
9193 0 : spdk_json_write_object_begin(w);
9194 0 : spdk_json_write_named_object_begin(w, "trid");
9195 0 : nvme_bdev_dump_trid_json(&entry_ctx->trid, w);
9196 0 : spdk_json_write_object_end(w);
9197 0 : spdk_json_write_object_end(w);
9198 : }
9199 0 : spdk_json_write_array_end(w);
9200 :
9201 0 : spdk_json_write_object_end(w);
9202 : }
9203 0 : spdk_json_write_array_end(w);
9204 0 : }
9205 :
9206 1 : SPDK_LOG_REGISTER_COMPONENT(bdev_nvme)
9207 :
9208 : static void
9209 0 : bdev_nvme_trace(void)
9210 : {
9211 0 : struct spdk_trace_tpoint_opts opts[] = {
9212 : {
9213 : "BDEV_NVME_IO_START", TRACE_BDEV_NVME_IO_START,
9214 : OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 1,
9215 : {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
9216 : },
9217 : {
9218 : "BDEV_NVME_IO_DONE", TRACE_BDEV_NVME_IO_DONE,
9219 : OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 0,
9220 : {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
9221 : }
9222 : };
9223 :
9224 :
9225 0 : spdk_trace_register_object(OBJECT_BDEV_NVME_IO, 'N');
9226 0 : spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
9227 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
9228 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
9229 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
9230 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
9231 0 : }
9232 1 : SPDK_TRACE_REGISTER_FN(bdev_nvme_trace, "bdev_nvme", TRACE_GROUP_BDEV_NVME)
|