Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2016 Intel Corporation. All rights reserved.
3 : * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4 : * Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : * Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved.
6 : */
7 :
8 : #include "spdk/stdinc.h"
9 :
10 : #include "bdev_nvme.h"
11 :
12 : #include "spdk/accel.h"
13 : #include "spdk/config.h"
14 : #include "spdk/endian.h"
15 : #include "spdk/bdev.h"
16 : #include "spdk/json.h"
17 : #include "spdk/keyring.h"
18 : #include "spdk/likely.h"
19 : #include "spdk/nvme.h"
20 : #include "spdk/nvme_ocssd.h"
21 : #include "spdk/nvme_zns.h"
22 : #include "spdk/opal.h"
23 : #include "spdk/thread.h"
24 : #include "spdk/trace.h"
25 : #include "spdk/string.h"
26 : #include "spdk/util.h"
27 : #include "spdk/uuid.h"
28 :
29 : #include "spdk/bdev_module.h"
30 : #include "spdk/log.h"
31 :
32 : #include "spdk_internal/usdt.h"
33 : #include "spdk_internal/trace_defs.h"
34 :
35 : #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true
36 : #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
37 :
38 : #define NSID_STR_LEN 10
39 :
40 : #define SPDK_CONTROLLER_NAME_MAX 512
41 :
42 : static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
43 :
44 : struct nvme_bdev_io {
45 : /** array of iovecs to transfer. */
46 : struct iovec *iovs;
47 :
48 : /** Number of iovecs in iovs array. */
49 : int iovcnt;
50 :
51 : /** Current iovec position. */
52 : int iovpos;
53 :
54 : /** Offset in current iovec. */
55 : uint32_t iov_offset;
56 :
57 : /** I/O path the current I/O or admin passthrough is submitted on, or the I/O path
58 : * being reset in a reset I/O.
59 : */
60 : struct nvme_io_path *io_path;
61 :
62 : /** array of iovecs to transfer. */
63 : struct iovec *fused_iovs;
64 :
65 : /** Number of iovecs in iovs array. */
66 : int fused_iovcnt;
67 :
68 : /** Current iovec position. */
69 : int fused_iovpos;
70 :
71 : /** Offset in current iovec. */
72 : uint32_t fused_iov_offset;
73 :
74 : /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */
75 : struct spdk_nvme_cpl cpl;
76 :
77 : /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */
78 : struct spdk_nvme_ns_cmd_ext_io_opts ext_opts;
79 :
80 : /** Keeps track if first of fused commands was submitted */
81 : bool first_fused_submitted;
82 :
83 : /** Keeps track if first of fused commands was completed */
84 : bool first_fused_completed;
85 :
86 : /** Temporary pointer to zone report buffer */
87 : struct spdk_nvme_zns_zone_report *zone_report_buf;
88 :
89 : /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */
90 : uint64_t handled_zones;
91 :
92 : /** Expiration value in ticks to retry the current I/O. */
93 : uint64_t retry_ticks;
94 :
95 : /* How many times the current I/O was retried. */
96 : int32_t retry_count;
97 :
98 : /* Current tsc at submit time. */
99 : uint64_t submit_tsc;
100 :
101 : /* Used to put nvme_bdev_io into the list */
102 : TAILQ_ENTRY(nvme_bdev_io) retry_link;
103 : };
104 :
105 : struct nvme_probe_skip_entry {
106 : struct spdk_nvme_transport_id trid;
107 : TAILQ_ENTRY(nvme_probe_skip_entry) tailq;
108 : };
109 : /* All the controllers deleted by users via RPC are skipped by hotplug monitor */
110 : static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
111 : g_skipped_nvme_ctrlrs);
112 :
113 : #define BDEV_NVME_DEFAULT_DIGESTS (SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA256) | \
114 : SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA384) | \
115 : SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA512))
116 :
117 : #define BDEV_NVME_DEFAULT_DHGROUPS (SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_NULL) | \
118 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_2048) | \
119 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_3072) | \
120 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_4096) | \
121 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_6144) | \
122 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_8192))
123 :
124 : static struct spdk_bdev_nvme_opts g_opts = {
125 : .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
126 : .timeout_us = 0,
127 : .timeout_admin_us = 0,
128 : .keep_alive_timeout_ms = SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS,
129 : .transport_retry_count = 4,
130 : .arbitration_burst = 0,
131 : .low_priority_weight = 0,
132 : .medium_priority_weight = 0,
133 : .high_priority_weight = 0,
134 : .nvme_adminq_poll_period_us = 10000ULL,
135 : .nvme_ioq_poll_period_us = 0,
136 : .io_queue_requests = 0,
137 : .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
138 : .bdev_retry_count = 3,
139 : .transport_ack_timeout = 0,
140 : .ctrlr_loss_timeout_sec = 0,
141 : .reconnect_delay_sec = 0,
142 : .fast_io_fail_timeout_sec = 0,
143 : .disable_auto_failback = false,
144 : .generate_uuids = false,
145 : .transport_tos = 0,
146 : .nvme_error_stat = false,
147 : .io_path_stat = false,
148 : .allow_accel_sequence = false,
149 : .dhchap_digests = BDEV_NVME_DEFAULT_DIGESTS,
150 : .dhchap_dhgroups = BDEV_NVME_DEFAULT_DHGROUPS,
151 : };
152 :
153 : #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL
154 : #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL
155 :
156 : static int g_hot_insert_nvme_controller_index = 0;
157 : static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
158 : static bool g_nvme_hotplug_enabled = false;
159 : struct spdk_thread *g_bdev_nvme_init_thread;
160 : static struct spdk_poller *g_hotplug_poller;
161 : static struct spdk_poller *g_hotplug_probe_poller;
162 : static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
163 :
164 : static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
165 : struct nvme_async_probe_ctx *ctx);
166 : static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
167 : struct nvme_async_probe_ctx *ctx);
168 : static int bdev_nvme_library_init(void);
169 : static void bdev_nvme_library_fini(void);
170 : static void _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch,
171 : struct spdk_bdev_io *bdev_io);
172 : static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
173 : struct spdk_bdev_io *bdev_io);
174 : static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
175 : void *md, uint64_t lba_count, uint64_t lba,
176 : uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
177 : struct spdk_accel_sequence *seq);
178 : static int bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
179 : void *md, uint64_t lba_count, uint64_t lba);
180 : static int bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
181 : void *md, uint64_t lba_count, uint64_t lba,
182 : uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
183 : struct spdk_accel_sequence *seq,
184 : union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13);
185 : static int bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
186 : void *md, uint64_t lba_count,
187 : uint64_t zslba, uint32_t flags);
188 : static int bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
189 : void *md, uint64_t lba_count, uint64_t lba,
190 : uint32_t flags);
191 : static int bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio,
192 : struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
193 : int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba,
194 : uint32_t flags);
195 : static int bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id,
196 : uint32_t num_zones, struct spdk_bdev_zone_info *info);
197 : static int bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
198 : enum spdk_bdev_zone_action action);
199 : static void bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch,
200 : struct nvme_bdev_io *bio,
201 : struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
202 : static int bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
203 : void *buf, size_t nbytes);
204 : static int bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
205 : void *buf, size_t nbytes, void *md_buf, size_t md_len);
206 : static int bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
207 : struct iovec *iov, int iovcnt, size_t nbytes,
208 : void *md_buf, size_t md_len);
209 : static void bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch,
210 : struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
211 : static void bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio);
212 : static int bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
213 : static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
214 : static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
215 : static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr);
216 :
217 : static struct nvme_ns *nvme_ns_alloc(void);
218 : static void nvme_ns_free(struct nvme_ns *ns);
219 :
220 : static int
221 173 : nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2)
222 : {
223 173 : return ns1->id < ns2->id ? -1 : ns1->id > ns2->id;
224 : }
225 :
226 897 : RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp);
227 :
228 : struct spdk_nvme_qpair *
229 1 : bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
230 : {
231 : struct nvme_ctrlr_channel *ctrlr_ch;
232 :
233 1 : assert(ctrlr_io_ch != NULL);
234 :
235 1 : ctrlr_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
236 :
237 1 : return ctrlr_ch->qpair->qpair;
238 : }
239 :
240 : static int
241 0 : bdev_nvme_get_ctx_size(void)
242 : {
243 0 : return sizeof(struct nvme_bdev_io);
244 : }
245 :
246 : static struct spdk_bdev_module nvme_if = {
247 : .name = "nvme",
248 : .async_fini = true,
249 : .module_init = bdev_nvme_library_init,
250 : .module_fini = bdev_nvme_library_fini,
251 : .config_json = bdev_nvme_config_json,
252 : .get_ctx_size = bdev_nvme_get_ctx_size,
253 :
254 : };
255 1 : SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
256 :
257 : struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_bdev_ctrlrs);
258 : pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
259 : bool g_bdev_nvme_module_finish;
260 :
261 : struct nvme_bdev_ctrlr *
262 270 : nvme_bdev_ctrlr_get_by_name(const char *name)
263 : {
264 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
265 :
266 270 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
267 148 : if (strcmp(name, nbdev_ctrlr->name) == 0) {
268 148 : break;
269 : }
270 : }
271 :
272 270 : return nbdev_ctrlr;
273 : }
274 :
275 : static struct nvme_ctrlr *
276 58 : nvme_bdev_ctrlr_get_ctrlr(struct nvme_bdev_ctrlr *nbdev_ctrlr,
277 : const struct spdk_nvme_transport_id *trid, const char *hostnqn)
278 : {
279 : const struct spdk_nvme_ctrlr_opts *opts;
280 : struct nvme_ctrlr *nvme_ctrlr;
281 :
282 99 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
283 74 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
284 74 : if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->active_path_id->trid) == 0 &&
285 33 : strcmp(hostnqn, opts->hostnqn) == 0) {
286 33 : break;
287 : }
288 : }
289 :
290 58 : return nvme_ctrlr;
291 : }
292 :
293 : struct nvme_ctrlr *
294 0 : nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr,
295 : uint16_t cntlid)
296 : {
297 : struct nvme_ctrlr *nvme_ctrlr;
298 : const struct spdk_nvme_ctrlr_data *cdata;
299 :
300 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
301 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
302 0 : if (cdata->cntlid == cntlid) {
303 0 : break;
304 : }
305 : }
306 :
307 0 : return nvme_ctrlr;
308 : }
309 :
310 : static struct nvme_bdev *
311 72 : nvme_bdev_ctrlr_get_bdev(struct nvme_bdev_ctrlr *nbdev_ctrlr, uint32_t nsid)
312 : {
313 : struct nvme_bdev *bdev;
314 :
315 72 : pthread_mutex_lock(&g_bdev_nvme_mutex);
316 106 : TAILQ_FOREACH(bdev, &nbdev_ctrlr->bdevs, tailq) {
317 68 : if (bdev->nsid == nsid) {
318 34 : break;
319 : }
320 : }
321 72 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
322 :
323 72 : return bdev;
324 : }
325 :
326 : struct nvme_ns *
327 140 : nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
328 : {
329 140 : struct nvme_ns ns;
330 :
331 140 : assert(nsid > 0);
332 :
333 140 : ns.id = nsid;
334 140 : return RB_FIND(nvme_ns_tree, &nvme_ctrlr->namespaces, &ns);
335 : }
336 :
337 : struct nvme_ns *
338 152 : nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr)
339 : {
340 152 : return RB_MIN(nvme_ns_tree, &nvme_ctrlr->namespaces);
341 : }
342 :
343 : struct nvme_ns *
344 63 : nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns)
345 : {
346 63 : if (ns == NULL) {
347 0 : return NULL;
348 : }
349 :
350 63 : return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
351 : }
352 :
353 : static struct nvme_ctrlr *
354 51 : nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid, const char *hostnqn)
355 : {
356 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
357 51 : struct nvme_ctrlr *nvme_ctrlr = NULL;
358 :
359 51 : pthread_mutex_lock(&g_bdev_nvme_mutex);
360 70 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
361 19 : nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, trid, hostnqn);
362 19 : if (nvme_ctrlr != NULL) {
363 0 : break;
364 : }
365 : }
366 51 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
367 :
368 51 : return nvme_ctrlr;
369 : }
370 :
371 : struct nvme_ctrlr *
372 71 : nvme_ctrlr_get_by_name(const char *name)
373 : {
374 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
375 71 : struct nvme_ctrlr *nvme_ctrlr = NULL;
376 :
377 71 : if (name == NULL) {
378 0 : return NULL;
379 : }
380 :
381 71 : pthread_mutex_lock(&g_bdev_nvme_mutex);
382 71 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
383 71 : if (nbdev_ctrlr != NULL) {
384 40 : nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
385 : }
386 71 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
387 :
388 71 : return nvme_ctrlr;
389 : }
390 :
391 : void
392 0 : nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx)
393 : {
394 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
395 :
396 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
397 0 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
398 0 : fn(nbdev_ctrlr, ctx);
399 : }
400 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
401 0 : }
402 :
403 : void
404 0 : nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
405 : {
406 : const char *trtype_str;
407 : const char *adrfam_str;
408 :
409 0 : trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
410 0 : if (trtype_str) {
411 0 : spdk_json_write_named_string(w, "trtype", trtype_str);
412 : }
413 :
414 0 : adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
415 0 : if (adrfam_str) {
416 0 : spdk_json_write_named_string(w, "adrfam", adrfam_str);
417 : }
418 :
419 0 : if (trid->traddr[0] != '\0') {
420 0 : spdk_json_write_named_string(w, "traddr", trid->traddr);
421 : }
422 :
423 0 : if (trid->trsvcid[0] != '\0') {
424 0 : spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
425 : }
426 :
427 0 : if (trid->subnqn[0] != '\0') {
428 0 : spdk_json_write_named_string(w, "subnqn", trid->subnqn);
429 : }
430 0 : }
431 :
432 : static void
433 59 : nvme_bdev_ctrlr_delete(struct nvme_bdev_ctrlr *nbdev_ctrlr,
434 : struct nvme_ctrlr *nvme_ctrlr)
435 : {
436 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_delete, nvme_ctrlr->nbdev_ctrlr->name);
437 59 : pthread_mutex_lock(&g_bdev_nvme_mutex);
438 :
439 59 : TAILQ_REMOVE(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
440 59 : if (!TAILQ_EMPTY(&nbdev_ctrlr->ctrlrs)) {
441 15 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
442 :
443 15 : return;
444 : }
445 44 : TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
446 :
447 44 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
448 :
449 44 : assert(TAILQ_EMPTY(&nbdev_ctrlr->bdevs));
450 :
451 44 : free(nbdev_ctrlr->name);
452 44 : free(nbdev_ctrlr);
453 : }
454 :
455 : static void
456 60 : _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
457 : {
458 : struct nvme_path_id *path_id, *tmp_path;
459 : struct nvme_ns *ns, *tmp_ns;
460 :
461 60 : free(nvme_ctrlr->copied_ana_desc);
462 60 : spdk_free(nvme_ctrlr->ana_log_page);
463 :
464 60 : if (nvme_ctrlr->opal_dev) {
465 0 : spdk_opal_dev_destruct(nvme_ctrlr->opal_dev);
466 0 : nvme_ctrlr->opal_dev = NULL;
467 : }
468 :
469 60 : if (nvme_ctrlr->nbdev_ctrlr) {
470 59 : nvme_bdev_ctrlr_delete(nvme_ctrlr->nbdev_ctrlr, nvme_ctrlr);
471 : }
472 :
473 60 : RB_FOREACH_SAFE(ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp_ns) {
474 0 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
475 0 : nvme_ns_free(ns);
476 : }
477 :
478 120 : TAILQ_FOREACH_SAFE(path_id, &nvme_ctrlr->trids, link, tmp_path) {
479 60 : TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
480 60 : free(path_id);
481 : }
482 :
483 60 : pthread_mutex_destroy(&nvme_ctrlr->mutex);
484 60 : spdk_keyring_put_key(nvme_ctrlr->psk);
485 60 : spdk_keyring_put_key(nvme_ctrlr->dhchap_key);
486 60 : spdk_keyring_put_key(nvme_ctrlr->dhchap_ctrlr_key);
487 60 : free(nvme_ctrlr);
488 :
489 60 : pthread_mutex_lock(&g_bdev_nvme_mutex);
490 60 : if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
491 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
492 0 : spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
493 0 : spdk_bdev_module_fini_done();
494 0 : return;
495 : }
496 60 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
497 : }
498 :
499 : static int
500 60 : nvme_detach_poller(void *arg)
501 : {
502 60 : struct nvme_ctrlr *nvme_ctrlr = arg;
503 : int rc;
504 :
505 60 : rc = spdk_nvme_detach_poll_async(nvme_ctrlr->detach_ctx);
506 60 : if (rc != -EAGAIN) {
507 60 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
508 60 : _nvme_ctrlr_delete(nvme_ctrlr);
509 : }
510 :
511 60 : return SPDK_POLLER_BUSY;
512 : }
513 :
514 : static void
515 60 : nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
516 : {
517 : int rc;
518 :
519 60 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
520 :
521 : /* First, unregister the adminq poller, as the driver will poll adminq if necessary */
522 60 : spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
523 :
524 : /* If we got here, the reset/detach poller cannot be active */
525 60 : assert(nvme_ctrlr->reset_detach_poller == NULL);
526 60 : nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(nvme_detach_poller,
527 : nvme_ctrlr, 1000);
528 60 : if (nvme_ctrlr->reset_detach_poller == NULL) {
529 0 : SPDK_ERRLOG("Failed to register detach poller\n");
530 0 : goto error;
531 : }
532 :
533 60 : rc = spdk_nvme_detach_async(nvme_ctrlr->ctrlr, &nvme_ctrlr->detach_ctx);
534 60 : if (rc != 0) {
535 0 : SPDK_ERRLOG("Failed to detach the NVMe controller\n");
536 0 : goto error;
537 : }
538 :
539 60 : return;
540 0 : error:
541 : /* We don't have a good way to handle errors here, so just do what we can and delete the
542 : * controller without detaching the underlying NVMe device.
543 : */
544 0 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
545 0 : _nvme_ctrlr_delete(nvme_ctrlr);
546 : }
547 :
548 : static void
549 59 : nvme_ctrlr_unregister_cb(void *io_device)
550 : {
551 59 : struct nvme_ctrlr *nvme_ctrlr = io_device;
552 :
553 59 : nvme_ctrlr_delete(nvme_ctrlr);
554 59 : }
555 :
556 : static void
557 59 : nvme_ctrlr_unregister(void *ctx)
558 : {
559 59 : struct nvme_ctrlr *nvme_ctrlr = ctx;
560 :
561 59 : spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb);
562 59 : }
563 :
564 : static bool
565 220 : nvme_ctrlr_can_be_unregistered(struct nvme_ctrlr *nvme_ctrlr)
566 : {
567 220 : if (!nvme_ctrlr->destruct) {
568 106 : return false;
569 : }
570 :
571 114 : if (nvme_ctrlr->ref > 0) {
572 55 : return false;
573 : }
574 :
575 59 : if (nvme_ctrlr->resetting) {
576 0 : return false;
577 : }
578 :
579 59 : if (nvme_ctrlr->ana_log_page_updating) {
580 0 : return false;
581 : }
582 :
583 59 : if (nvme_ctrlr->io_path_cache_clearing) {
584 0 : return false;
585 : }
586 :
587 59 : return true;
588 : }
589 :
590 : static void
591 164 : nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr)
592 : {
593 164 : pthread_mutex_lock(&nvme_ctrlr->mutex);
594 : SPDK_DTRACE_PROBE2(bdev_nvme_ctrlr_release, nvme_ctrlr->nbdev_ctrlr->name, nvme_ctrlr->ref);
595 :
596 164 : assert(nvme_ctrlr->ref > 0);
597 164 : nvme_ctrlr->ref--;
598 :
599 164 : if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
600 105 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
601 105 : return;
602 : }
603 :
604 59 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
605 :
606 59 : spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr);
607 : }
608 :
609 : static void
610 161 : bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch)
611 : {
612 161 : nbdev_ch->current_io_path = NULL;
613 161 : nbdev_ch->rr_counter = 0;
614 161 : }
615 :
616 : static struct nvme_io_path *
617 8 : _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
618 : {
619 : struct nvme_io_path *io_path;
620 :
621 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
622 15 : if (io_path->nvme_ns == nvme_ns) {
623 7 : break;
624 : }
625 : }
626 :
627 8 : return io_path;
628 : }
629 :
630 : static struct nvme_io_path *
631 35 : nvme_io_path_alloc(void)
632 : {
633 : struct nvme_io_path *io_path;
634 :
635 35 : io_path = calloc(1, sizeof(*io_path));
636 35 : if (io_path == NULL) {
637 0 : SPDK_ERRLOG("Failed to alloc io_path.\n");
638 0 : return NULL;
639 : }
640 :
641 35 : if (g_opts.io_path_stat) {
642 0 : io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
643 0 : if (io_path->stat == NULL) {
644 0 : free(io_path);
645 0 : SPDK_ERRLOG("Failed to alloc io_path stat.\n");
646 0 : return NULL;
647 : }
648 0 : spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
649 : }
650 :
651 35 : return io_path;
652 : }
653 :
654 : static void
655 35 : nvme_io_path_free(struct nvme_io_path *io_path)
656 : {
657 35 : free(io_path->stat);
658 35 : free(io_path);
659 35 : }
660 :
661 : static int
662 35 : _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
663 : {
664 : struct nvme_io_path *io_path;
665 : struct spdk_io_channel *ch;
666 : struct nvme_ctrlr_channel *ctrlr_ch;
667 : struct nvme_qpair *nvme_qpair;
668 :
669 35 : io_path = nvme_io_path_alloc();
670 35 : if (io_path == NULL) {
671 0 : return -ENOMEM;
672 : }
673 :
674 35 : io_path->nvme_ns = nvme_ns;
675 :
676 35 : ch = spdk_get_io_channel(nvme_ns->ctrlr);
677 35 : if (ch == NULL) {
678 0 : nvme_io_path_free(io_path);
679 0 : SPDK_ERRLOG("Failed to alloc io_channel.\n");
680 0 : return -ENOMEM;
681 : }
682 :
683 35 : ctrlr_ch = spdk_io_channel_get_ctx(ch);
684 :
685 35 : nvme_qpair = ctrlr_ch->qpair;
686 35 : assert(nvme_qpair != NULL);
687 :
688 35 : io_path->qpair = nvme_qpair;
689 35 : TAILQ_INSERT_TAIL(&nvme_qpair->io_path_list, io_path, tailq);
690 :
691 35 : io_path->nbdev_ch = nbdev_ch;
692 35 : STAILQ_INSERT_TAIL(&nbdev_ch->io_path_list, io_path, stailq);
693 :
694 35 : bdev_nvme_clear_current_io_path(nbdev_ch);
695 :
696 35 : return 0;
697 : }
698 :
699 : static void
700 35 : bdev_nvme_clear_retry_io_path(struct nvme_bdev_channel *nbdev_ch,
701 : struct nvme_io_path *io_path)
702 : {
703 : struct nvme_bdev_io *bio;
704 :
705 36 : TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
706 1 : if (bio->io_path == io_path) {
707 1 : bio->io_path = NULL;
708 : }
709 : }
710 35 : }
711 :
712 : static void
713 35 : _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *io_path)
714 : {
715 : struct spdk_io_channel *ch;
716 : struct nvme_qpair *nvme_qpair;
717 : struct nvme_ctrlr_channel *ctrlr_ch;
718 : struct nvme_bdev *nbdev;
719 :
720 35 : nbdev = spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(nbdev_ch));
721 :
722 : /* Add the statistics to nvme_ns before this path is destroyed. */
723 35 : pthread_mutex_lock(&nbdev->mutex);
724 35 : if (nbdev->ref != 0 && io_path->nvme_ns->stat != NULL && io_path->stat != NULL) {
725 0 : spdk_bdev_add_io_stat(io_path->nvme_ns->stat, io_path->stat);
726 : }
727 35 : pthread_mutex_unlock(&nbdev->mutex);
728 :
729 35 : bdev_nvme_clear_current_io_path(nbdev_ch);
730 35 : bdev_nvme_clear_retry_io_path(nbdev_ch, io_path);
731 :
732 35 : STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
733 35 : io_path->nbdev_ch = NULL;
734 :
735 35 : nvme_qpair = io_path->qpair;
736 35 : assert(nvme_qpair != NULL);
737 :
738 35 : ctrlr_ch = nvme_qpair->ctrlr_ch;
739 35 : assert(ctrlr_ch != NULL);
740 :
741 35 : ch = spdk_io_channel_from_ctx(ctrlr_ch);
742 35 : spdk_put_io_channel(ch);
743 :
744 : /* After an io_path is removed, I/Os submitted to it may complete and update statistics
745 : * of the io_path. To avoid heap-use-after-free error from this case, do not free the
746 : * io_path here but free the io_path when the associated qpair is freed. It is ensured
747 : * that all I/Os submitted to the io_path are completed when the associated qpair is freed.
748 : */
749 35 : }
750 :
751 : static void
752 22 : _bdev_nvme_delete_io_paths(struct nvme_bdev_channel *nbdev_ch)
753 : {
754 : struct nvme_io_path *io_path, *tmp_io_path;
755 :
756 55 : STAILQ_FOREACH_SAFE(io_path, &nbdev_ch->io_path_list, stailq, tmp_io_path) {
757 33 : _bdev_nvme_delete_io_path(nbdev_ch, io_path);
758 : }
759 22 : }
760 :
761 : static int
762 22 : bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
763 : {
764 22 : struct nvme_bdev_channel *nbdev_ch = ctx_buf;
765 22 : struct nvme_bdev *nbdev = io_device;
766 : struct nvme_ns *nvme_ns;
767 : int rc;
768 :
769 22 : STAILQ_INIT(&nbdev_ch->io_path_list);
770 22 : TAILQ_INIT(&nbdev_ch->retry_io_list);
771 :
772 22 : pthread_mutex_lock(&nbdev->mutex);
773 :
774 22 : nbdev_ch->mp_policy = nbdev->mp_policy;
775 22 : nbdev_ch->mp_selector = nbdev->mp_selector;
776 22 : nbdev_ch->rr_min_io = nbdev->rr_min_io;
777 :
778 55 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
779 33 : rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
780 33 : if (rc != 0) {
781 0 : pthread_mutex_unlock(&nbdev->mutex);
782 :
783 0 : _bdev_nvme_delete_io_paths(nbdev_ch);
784 0 : return rc;
785 : }
786 : }
787 22 : pthread_mutex_unlock(&nbdev->mutex);
788 :
789 22 : return 0;
790 : }
791 :
792 : /* If cpl != NULL, complete the bdev_io with nvme status based on 'cpl'.
793 : * If cpl == NULL, complete the bdev_io with bdev status based on 'status'.
794 : */
795 : static inline void
796 47 : __bdev_nvme_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status,
797 : const struct spdk_nvme_cpl *cpl)
798 : {
799 47 : spdk_trace_record(TRACE_BDEV_NVME_IO_DONE, 0, 0, (uintptr_t)bdev_io->driver_ctx,
800 : (uintptr_t)bdev_io);
801 47 : if (cpl) {
802 29 : spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
803 : } else {
804 18 : spdk_bdev_io_complete(bdev_io, status);
805 : }
806 47 : }
807 :
808 : static void bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch);
809 :
810 : static void
811 22 : bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf)
812 : {
813 22 : struct nvme_bdev_channel *nbdev_ch = ctx_buf;
814 :
815 22 : bdev_nvme_abort_retry_ios(nbdev_ch);
816 22 : _bdev_nvme_delete_io_paths(nbdev_ch);
817 22 : }
818 :
819 : static inline bool
820 58 : bdev_nvme_io_type_is_admin(enum spdk_bdev_io_type io_type)
821 : {
822 58 : switch (io_type) {
823 5 : case SPDK_BDEV_IO_TYPE_RESET:
824 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
825 : case SPDK_BDEV_IO_TYPE_ABORT:
826 5 : return true;
827 53 : default:
828 53 : break;
829 : }
830 :
831 53 : return false;
832 : }
833 :
834 : static inline bool
835 90 : nvme_ns_is_active(struct nvme_ns *nvme_ns)
836 : {
837 90 : if (spdk_unlikely(nvme_ns->ana_state_updating)) {
838 1 : return false;
839 : }
840 :
841 89 : if (spdk_unlikely(nvme_ns->ns == NULL)) {
842 0 : return false;
843 : }
844 :
845 89 : return true;
846 : }
847 :
848 : static inline bool
849 78 : nvme_ns_is_accessible(struct nvme_ns *nvme_ns)
850 : {
851 78 : if (spdk_unlikely(!nvme_ns_is_active(nvme_ns))) {
852 1 : return false;
853 : }
854 :
855 77 : switch (nvme_ns->ana_state) {
856 68 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
857 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
858 68 : return true;
859 9 : default:
860 9 : break;
861 : }
862 :
863 9 : return false;
864 : }
865 :
866 : static inline bool
867 117 : nvme_qpair_is_connected(struct nvme_qpair *nvme_qpair)
868 : {
869 117 : if (spdk_unlikely(nvme_qpair->qpair == NULL)) {
870 20 : return false;
871 : }
872 :
873 97 : if (spdk_unlikely(spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
874 : SPDK_NVME_QPAIR_FAILURE_NONE)) {
875 2 : return false;
876 : }
877 :
878 95 : if (spdk_unlikely(nvme_qpair->ctrlr_ch->reset_iter != NULL)) {
879 0 : return false;
880 : }
881 :
882 95 : return true;
883 : }
884 :
885 : static inline bool
886 92 : nvme_io_path_is_available(struct nvme_io_path *io_path)
887 : {
888 92 : if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
889 14 : return false;
890 : }
891 :
892 78 : if (spdk_unlikely(!nvme_ns_is_accessible(io_path->nvme_ns))) {
893 10 : return false;
894 : }
895 :
896 68 : return true;
897 : }
898 :
899 : static inline bool
900 8 : nvme_ctrlr_is_failed(struct nvme_ctrlr *nvme_ctrlr)
901 : {
902 8 : if (nvme_ctrlr->destruct) {
903 0 : return true;
904 : }
905 :
906 8 : if (nvme_ctrlr->fast_io_fail_timedout) {
907 2 : return true;
908 : }
909 :
910 6 : if (nvme_ctrlr->resetting) {
911 4 : if (nvme_ctrlr->opts.reconnect_delay_sec != 0) {
912 4 : return false;
913 : } else {
914 0 : return true;
915 : }
916 : }
917 :
918 2 : if (nvme_ctrlr->reconnect_is_delayed) {
919 2 : return false;
920 : }
921 :
922 0 : if (nvme_ctrlr->disabled) {
923 0 : return true;
924 : }
925 :
926 0 : if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
927 0 : return true;
928 : } else {
929 0 : return false;
930 : }
931 : }
932 :
933 : static bool
934 20 : nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr)
935 : {
936 20 : if (nvme_ctrlr->destruct) {
937 0 : return false;
938 : }
939 :
940 20 : if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
941 3 : return false;
942 : }
943 :
944 17 : if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) {
945 1 : return false;
946 : }
947 :
948 16 : if (nvme_ctrlr->disabled) {
949 0 : return false;
950 : }
951 :
952 16 : return true;
953 : }
954 :
955 : /* Simulate circular linked list. */
956 : static inline struct nvme_io_path *
957 87 : nvme_io_path_get_next(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *prev_path)
958 : {
959 : struct nvme_io_path *next_path;
960 :
961 87 : if (prev_path != NULL) {
962 37 : next_path = STAILQ_NEXT(prev_path, stailq);
963 37 : if (next_path != NULL) {
964 14 : return next_path;
965 : }
966 : }
967 :
968 73 : return STAILQ_FIRST(&nbdev_ch->io_path_list);
969 : }
970 :
971 : static struct nvme_io_path *
972 57 : _bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
973 : {
974 57 : struct nvme_io_path *io_path, *start, *non_optimized = NULL;
975 :
976 57 : start = nvme_io_path_get_next(nbdev_ch, nbdev_ch->current_io_path);
977 :
978 57 : io_path = start;
979 : do {
980 69 : if (spdk_likely(nvme_io_path_is_available(io_path))) {
981 49 : switch (io_path->nvme_ns->ana_state) {
982 39 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
983 39 : nbdev_ch->current_io_path = io_path;
984 39 : return io_path;
985 10 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
986 10 : if (non_optimized == NULL) {
987 7 : non_optimized = io_path;
988 : }
989 10 : break;
990 0 : default:
991 0 : assert(false);
992 : break;
993 : }
994 : }
995 30 : io_path = nvme_io_path_get_next(nbdev_ch, io_path);
996 30 : } while (io_path != start);
997 :
998 18 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
999 : /* We come here only if there is no optimized path. Cache even non_optimized
1000 : * path for load balance across multiple non_optimized paths.
1001 : */
1002 1 : nbdev_ch->current_io_path = non_optimized;
1003 : }
1004 :
1005 18 : return non_optimized;
1006 : }
1007 :
1008 : static struct nvme_io_path *
1009 4 : _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch)
1010 : {
1011 : struct nvme_io_path *io_path;
1012 4 : struct nvme_io_path *optimized = NULL, *non_optimized = NULL;
1013 4 : uint32_t opt_min_qd = UINT32_MAX, non_opt_min_qd = UINT32_MAX;
1014 : uint32_t num_outstanding_reqs;
1015 :
1016 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
1017 12 : if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
1018 : /* The device is currently resetting. */
1019 0 : continue;
1020 : }
1021 :
1022 12 : if (spdk_unlikely(!nvme_ns_is_active(io_path->nvme_ns))) {
1023 0 : continue;
1024 : }
1025 :
1026 12 : num_outstanding_reqs = spdk_nvme_qpair_get_num_outstanding_reqs(io_path->qpair->qpair);
1027 12 : switch (io_path->nvme_ns->ana_state) {
1028 6 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
1029 6 : if (num_outstanding_reqs < opt_min_qd) {
1030 5 : opt_min_qd = num_outstanding_reqs;
1031 5 : optimized = io_path;
1032 : }
1033 6 : break;
1034 3 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
1035 3 : if (num_outstanding_reqs < non_opt_min_qd) {
1036 3 : non_opt_min_qd = num_outstanding_reqs;
1037 3 : non_optimized = io_path;
1038 : }
1039 3 : break;
1040 3 : default:
1041 3 : break;
1042 : }
1043 : }
1044 :
1045 : /* don't cache io path for BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH selector */
1046 4 : if (optimized != NULL) {
1047 3 : return optimized;
1048 : }
1049 :
1050 1 : return non_optimized;
1051 : }
1052 :
1053 : static inline struct nvme_io_path *
1054 95 : bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
1055 : {
1056 95 : if (spdk_likely(nbdev_ch->current_io_path != NULL)) {
1057 41 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) {
1058 31 : return nbdev_ch->current_io_path;
1059 10 : } else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
1060 10 : if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) {
1061 3 : return nbdev_ch->current_io_path;
1062 : }
1063 7 : nbdev_ch->rr_counter = 0;
1064 : }
1065 : }
1066 :
1067 61 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE ||
1068 14 : nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
1069 57 : return _bdev_nvme_find_io_path(nbdev_ch);
1070 : } else {
1071 4 : return _bdev_nvme_find_io_path_min_qd(nbdev_ch);
1072 : }
1073 : }
1074 :
1075 : /* Return true if there is any io_path whose qpair is active or ctrlr is not failed,
1076 : * or false otherwise.
1077 : *
1078 : * If any io_path has an active qpair but find_io_path() returned NULL, its namespace
1079 : * is likely to be non-accessible now but may become accessible.
1080 : *
1081 : * If any io_path has an unfailed ctrlr but find_io_path() returned NULL, the ctrlr
1082 : * is likely to be resetting now but the reset may succeed. A ctrlr is set to unfailed
1083 : * when starting to reset it but it is set to failed when the reset failed. Hence, if
1084 : * a ctrlr is unfailed, it is likely that it works fine or is resetting.
1085 : */
1086 : static bool
1087 13 : any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
1088 : {
1089 : struct nvme_io_path *io_path;
1090 :
1091 15 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
1092 13 : if (io_path->nvme_ns->ana_transition_timedout) {
1093 0 : continue;
1094 : }
1095 :
1096 13 : if (nvme_qpair_is_connected(io_path->qpair) ||
1097 8 : !nvme_ctrlr_is_failed(io_path->qpair->ctrlr)) {
1098 11 : return true;
1099 : }
1100 : }
1101 :
1102 2 : return false;
1103 : }
1104 :
1105 : static void
1106 14 : bdev_nvme_retry_io(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
1107 : {
1108 14 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1109 : struct spdk_io_channel *ch;
1110 :
1111 14 : if (nbdev_io->io_path != NULL && nvme_io_path_is_available(nbdev_io->io_path)) {
1112 3 : _bdev_nvme_submit_request(nbdev_ch, bdev_io);
1113 : } else {
1114 11 : ch = spdk_io_channel_from_ctx(nbdev_ch);
1115 11 : bdev_nvme_submit_request(ch, bdev_io);
1116 : }
1117 14 : }
1118 :
1119 : static int
1120 14 : bdev_nvme_retry_ios(void *arg)
1121 : {
1122 14 : struct nvme_bdev_channel *nbdev_ch = arg;
1123 : struct nvme_bdev_io *bio, *tmp_bio;
1124 : uint64_t now, delay_us;
1125 :
1126 14 : now = spdk_get_ticks();
1127 :
1128 28 : TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
1129 15 : if (bio->retry_ticks > now) {
1130 1 : break;
1131 : }
1132 :
1133 14 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
1134 :
1135 14 : bdev_nvme_retry_io(nbdev_ch, spdk_bdev_io_from_ctx(bio));
1136 : }
1137 :
1138 14 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1139 :
1140 14 : bio = TAILQ_FIRST(&nbdev_ch->retry_io_list);
1141 14 : if (bio != NULL) {
1142 4 : delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
1143 :
1144 4 : nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
1145 : delay_us);
1146 : }
1147 :
1148 14 : return SPDK_POLLER_BUSY;
1149 : }
1150 :
1151 : static void
1152 15 : bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
1153 : struct nvme_bdev_io *bio, uint64_t delay_ms)
1154 : {
1155 : struct nvme_bdev_io *tmp_bio;
1156 :
1157 15 : bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;
1158 :
1159 15 : TAILQ_FOREACH_REVERSE(tmp_bio, &nbdev_ch->retry_io_list, retry_io_head, retry_link) {
1160 1 : if (tmp_bio->retry_ticks <= bio->retry_ticks) {
1161 1 : TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bio, bio,
1162 : retry_link);
1163 1 : return;
1164 : }
1165 : }
1166 :
1167 : /* No earlier I/Os were found. This I/O must be the new head. */
1168 14 : TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bio, retry_link);
1169 :
1170 14 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1171 :
1172 14 : nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
1173 : delay_ms * 1000ULL);
1174 : }
1175 :
1176 : static void
1177 36 : bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
1178 : {
1179 : struct nvme_bdev_io *bio, *tmp_bio;
1180 :
1181 36 : TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
1182 0 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
1183 0 : __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
1184 : }
1185 :
1186 36 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1187 36 : }
1188 :
1189 : static int
1190 6 : bdev_nvme_abort_retry_io(struct nvme_bdev_channel *nbdev_ch,
1191 : struct nvme_bdev_io *bio_to_abort)
1192 : {
1193 : struct nvme_bdev_io *bio;
1194 :
1195 6 : TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
1196 1 : if (bio == bio_to_abort) {
1197 1 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
1198 1 : __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
1199 1 : return 0;
1200 : }
1201 : }
1202 :
1203 5 : return -ENOENT;
1204 : }
1205 :
1206 : static void
1207 12 : bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl)
1208 : {
1209 : struct nvme_bdev *nbdev;
1210 : uint16_t sct, sc;
1211 :
1212 12 : assert(spdk_nvme_cpl_is_error(cpl));
1213 :
1214 12 : nbdev = bdev_io->bdev->ctxt;
1215 :
1216 12 : if (nbdev->err_stat == NULL) {
1217 12 : return;
1218 : }
1219 :
1220 0 : sct = cpl->status.sct;
1221 0 : sc = cpl->status.sc;
1222 :
1223 0 : pthread_mutex_lock(&nbdev->mutex);
1224 :
1225 0 : nbdev->err_stat->status_type[sct]++;
1226 0 : switch (sct) {
1227 0 : case SPDK_NVME_SCT_GENERIC:
1228 : case SPDK_NVME_SCT_COMMAND_SPECIFIC:
1229 : case SPDK_NVME_SCT_MEDIA_ERROR:
1230 : case SPDK_NVME_SCT_PATH:
1231 0 : nbdev->err_stat->status[sct][sc]++;
1232 0 : break;
1233 0 : default:
1234 0 : break;
1235 : }
1236 :
1237 0 : pthread_mutex_unlock(&nbdev->mutex);
1238 : }
1239 :
1240 : static inline void
1241 20 : bdev_nvme_update_io_path_stat(struct nvme_bdev_io *bio)
1242 : {
1243 20 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1244 20 : uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
1245 20 : uint32_t blocklen = bdev_io->bdev->blocklen;
1246 : struct spdk_bdev_io_stat *stat;
1247 : uint64_t tsc_diff;
1248 :
1249 20 : if (bio->io_path->stat == NULL) {
1250 20 : return;
1251 : }
1252 :
1253 0 : tsc_diff = spdk_get_ticks() - bio->submit_tsc;
1254 0 : stat = bio->io_path->stat;
1255 :
1256 0 : switch (bdev_io->type) {
1257 0 : case SPDK_BDEV_IO_TYPE_READ:
1258 0 : stat->bytes_read += num_blocks * blocklen;
1259 0 : stat->num_read_ops++;
1260 0 : stat->read_latency_ticks += tsc_diff;
1261 0 : if (stat->max_read_latency_ticks < tsc_diff) {
1262 0 : stat->max_read_latency_ticks = tsc_diff;
1263 : }
1264 0 : if (stat->min_read_latency_ticks > tsc_diff) {
1265 0 : stat->min_read_latency_ticks = tsc_diff;
1266 : }
1267 0 : break;
1268 0 : case SPDK_BDEV_IO_TYPE_WRITE:
1269 0 : stat->bytes_written += num_blocks * blocklen;
1270 0 : stat->num_write_ops++;
1271 0 : stat->write_latency_ticks += tsc_diff;
1272 0 : if (stat->max_write_latency_ticks < tsc_diff) {
1273 0 : stat->max_write_latency_ticks = tsc_diff;
1274 : }
1275 0 : if (stat->min_write_latency_ticks > tsc_diff) {
1276 0 : stat->min_write_latency_ticks = tsc_diff;
1277 : }
1278 0 : break;
1279 0 : case SPDK_BDEV_IO_TYPE_UNMAP:
1280 0 : stat->bytes_unmapped += num_blocks * blocklen;
1281 0 : stat->num_unmap_ops++;
1282 0 : stat->unmap_latency_ticks += tsc_diff;
1283 0 : if (stat->max_unmap_latency_ticks < tsc_diff) {
1284 0 : stat->max_unmap_latency_ticks = tsc_diff;
1285 : }
1286 0 : if (stat->min_unmap_latency_ticks > tsc_diff) {
1287 0 : stat->min_unmap_latency_ticks = tsc_diff;
1288 : }
1289 0 : break;
1290 0 : case SPDK_BDEV_IO_TYPE_ZCOPY:
1291 : /* Track the data in the start phase only */
1292 0 : if (!bdev_io->u.bdev.zcopy.start) {
1293 0 : break;
1294 : }
1295 0 : if (bdev_io->u.bdev.zcopy.populate) {
1296 0 : stat->bytes_read += num_blocks * blocklen;
1297 0 : stat->num_read_ops++;
1298 0 : stat->read_latency_ticks += tsc_diff;
1299 0 : if (stat->max_read_latency_ticks < tsc_diff) {
1300 0 : stat->max_read_latency_ticks = tsc_diff;
1301 : }
1302 0 : if (stat->min_read_latency_ticks > tsc_diff) {
1303 0 : stat->min_read_latency_ticks = tsc_diff;
1304 : }
1305 : } else {
1306 0 : stat->bytes_written += num_blocks * blocklen;
1307 0 : stat->num_write_ops++;
1308 0 : stat->write_latency_ticks += tsc_diff;
1309 0 : if (stat->max_write_latency_ticks < tsc_diff) {
1310 0 : stat->max_write_latency_ticks = tsc_diff;
1311 : }
1312 0 : if (stat->min_write_latency_ticks > tsc_diff) {
1313 0 : stat->min_write_latency_ticks = tsc_diff;
1314 : }
1315 : }
1316 0 : break;
1317 0 : case SPDK_BDEV_IO_TYPE_COPY:
1318 0 : stat->bytes_copied += num_blocks * blocklen;
1319 0 : stat->num_copy_ops++;
1320 0 : stat->copy_latency_ticks += tsc_diff;
1321 0 : if (stat->max_copy_latency_ticks < tsc_diff) {
1322 0 : stat->max_copy_latency_ticks = tsc_diff;
1323 : }
1324 0 : if (stat->min_copy_latency_ticks > tsc_diff) {
1325 0 : stat->min_copy_latency_ticks = tsc_diff;
1326 : }
1327 0 : break;
1328 0 : default:
1329 0 : break;
1330 : }
1331 : }
1332 :
1333 : static bool
1334 7 : bdev_nvme_check_retry_io(struct nvme_bdev_io *bio,
1335 : const struct spdk_nvme_cpl *cpl,
1336 : struct nvme_bdev_channel *nbdev_ch,
1337 : uint64_t *_delay_ms)
1338 : {
1339 7 : struct nvme_io_path *io_path = bio->io_path;
1340 7 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
1341 : const struct spdk_nvme_ctrlr_data *cdata;
1342 :
1343 7 : if (spdk_nvme_cpl_is_path_error(cpl) ||
1344 5 : spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
1345 4 : !nvme_io_path_is_available(io_path) ||
1346 4 : !nvme_ctrlr_is_available(nvme_ctrlr)) {
1347 3 : bdev_nvme_clear_current_io_path(nbdev_ch);
1348 3 : bio->io_path = NULL;
1349 3 : if (spdk_nvme_cpl_is_ana_error(cpl)) {
1350 1 : if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
1351 1 : io_path->nvme_ns->ana_state_updating = true;
1352 : }
1353 : }
1354 3 : if (!any_io_path_may_become_available(nbdev_ch)) {
1355 0 : return false;
1356 : }
1357 3 : *_delay_ms = 0;
1358 : } else {
1359 4 : bio->retry_count++;
1360 :
1361 4 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
1362 :
1363 4 : if (cpl->status.crd != 0) {
1364 1 : *_delay_ms = cdata->crdt[cpl->status.crd] * 100;
1365 : } else {
1366 3 : *_delay_ms = 0;
1367 : }
1368 : }
1369 :
1370 7 : return true;
1371 : }
1372 :
1373 : static inline void
1374 32 : bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
1375 : const struct spdk_nvme_cpl *cpl)
1376 : {
1377 32 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1378 : struct nvme_bdev_channel *nbdev_ch;
1379 32 : uint64_t delay_ms;
1380 :
1381 32 : assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
1382 :
1383 32 : if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
1384 20 : bdev_nvme_update_io_path_stat(bio);
1385 20 : goto complete;
1386 : }
1387 :
1388 : /* Update error counts before deciding if retry is needed.
1389 : * Hence, error counts may be more than the number of I/O errors.
1390 : */
1391 12 : bdev_nvme_update_nvme_error_stat(bdev_io, cpl);
1392 :
1393 12 : if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) ||
1394 8 : (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
1395 5 : goto complete;
1396 : }
1397 :
1398 : /* At this point we don't know whether the sequence was successfully executed or not, so we
1399 : * cannot retry the IO */
1400 7 : if (bdev_io->u.bdev.accel_sequence != NULL) {
1401 0 : goto complete;
1402 : }
1403 :
1404 7 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
1405 :
1406 7 : if (bdev_nvme_check_retry_io(bio, cpl, nbdev_ch, &delay_ms)) {
1407 7 : bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
1408 7 : return;
1409 : }
1410 :
1411 25 : complete:
1412 25 : bio->retry_count = 0;
1413 25 : bio->submit_tsc = 0;
1414 25 : bdev_io->u.bdev.accel_sequence = NULL;
1415 25 : __bdev_nvme_io_complete(bdev_io, 0, cpl);
1416 : }
1417 :
1418 : static inline void
1419 11 : bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
1420 : {
1421 11 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1422 : struct nvme_bdev_channel *nbdev_ch;
1423 : enum spdk_bdev_io_status io_status;
1424 :
1425 11 : assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
1426 :
1427 11 : switch (rc) {
1428 1 : case 0:
1429 1 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1430 1 : break;
1431 0 : case -ENOMEM:
1432 0 : io_status = SPDK_BDEV_IO_STATUS_NOMEM;
1433 0 : break;
1434 10 : case -ENXIO:
1435 10 : if (g_opts.bdev_retry_count == -1 || bio->retry_count < g_opts.bdev_retry_count) {
1436 10 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
1437 :
1438 10 : bdev_nvme_clear_current_io_path(nbdev_ch);
1439 10 : bio->io_path = NULL;
1440 :
1441 10 : if (any_io_path_may_become_available(nbdev_ch)) {
1442 8 : bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
1443 8 : return;
1444 : }
1445 : }
1446 :
1447 : /* fallthrough */
1448 : default:
1449 2 : spdk_accel_sequence_abort(bdev_io->u.bdev.accel_sequence);
1450 2 : bdev_io->u.bdev.accel_sequence = NULL;
1451 2 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
1452 2 : break;
1453 : }
1454 :
1455 3 : bio->retry_count = 0;
1456 3 : bio->submit_tsc = 0;
1457 3 : __bdev_nvme_io_complete(bdev_io, io_status, NULL);
1458 : }
1459 :
1460 : static inline void
1461 4 : bdev_nvme_admin_complete(struct nvme_bdev_io *bio, int rc)
1462 : {
1463 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1464 : enum spdk_bdev_io_status io_status;
1465 :
1466 4 : switch (rc) {
1467 1 : case 0:
1468 1 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1469 1 : break;
1470 0 : case -ENOMEM:
1471 0 : io_status = SPDK_BDEV_IO_STATUS_NOMEM;
1472 0 : break;
1473 3 : case -ENXIO:
1474 : /* fallthrough */
1475 : default:
1476 3 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
1477 3 : break;
1478 : }
1479 :
1480 4 : __bdev_nvme_io_complete(bdev_io, io_status, NULL);
1481 4 : }
1482 :
1483 : static void
1484 3 : bdev_nvme_clear_io_path_caches_done(struct spdk_io_channel_iter *i, int status)
1485 : {
1486 3 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
1487 :
1488 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1489 :
1490 3 : assert(nvme_ctrlr->io_path_cache_clearing == true);
1491 3 : nvme_ctrlr->io_path_cache_clearing = false;
1492 :
1493 3 : if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
1494 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1495 3 : return;
1496 : }
1497 :
1498 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1499 :
1500 0 : nvme_ctrlr_unregister(nvme_ctrlr);
1501 : }
1502 :
1503 : static void
1504 320 : _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
1505 : {
1506 : struct nvme_io_path *io_path;
1507 :
1508 459 : TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
1509 139 : if (io_path->nbdev_ch == NULL) {
1510 64 : continue;
1511 : }
1512 75 : bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
1513 : }
1514 320 : }
1515 :
1516 : static void
1517 1 : bdev_nvme_clear_io_path_cache(struct spdk_io_channel_iter *i)
1518 : {
1519 1 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
1520 1 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
1521 :
1522 1 : assert(ctrlr_ch->qpair != NULL);
1523 :
1524 1 : _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
1525 :
1526 1 : spdk_for_each_channel_continue(i, 0);
1527 1 : }
1528 :
1529 : static void
1530 3 : bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr)
1531 : {
1532 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1533 3 : if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
1534 : nvme_ctrlr->io_path_cache_clearing) {
1535 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1536 0 : return;
1537 : }
1538 :
1539 3 : nvme_ctrlr->io_path_cache_clearing = true;
1540 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1541 :
1542 3 : spdk_for_each_channel(nvme_ctrlr,
1543 : bdev_nvme_clear_io_path_cache,
1544 : NULL,
1545 : bdev_nvme_clear_io_path_caches_done);
1546 : }
1547 :
1548 : static struct nvme_qpair *
1549 99 : nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair *qpair)
1550 : {
1551 : struct nvme_qpair *nvme_qpair;
1552 :
1553 108 : TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
1554 108 : if (nvme_qpair->qpair == qpair) {
1555 99 : break;
1556 : }
1557 : }
1558 :
1559 99 : return nvme_qpair;
1560 : }
1561 :
1562 : static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair);
1563 :
1564 : static void
1565 99 : bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
1566 : {
1567 99 : struct nvme_poll_group *group = poll_group_ctx;
1568 : struct nvme_qpair *nvme_qpair;
1569 : struct nvme_ctrlr_channel *ctrlr_ch;
1570 : int status;
1571 :
1572 99 : nvme_qpair = nvme_poll_group_get_qpair(group, qpair);
1573 99 : if (nvme_qpair == NULL) {
1574 0 : return;
1575 : }
1576 :
1577 99 : if (nvme_qpair->qpair != NULL) {
1578 99 : spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
1579 99 : nvme_qpair->qpair = NULL;
1580 : }
1581 :
1582 99 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1583 :
1584 99 : ctrlr_ch = nvme_qpair->ctrlr_ch;
1585 :
1586 99 : if (ctrlr_ch != NULL) {
1587 56 : if (ctrlr_ch->reset_iter != NULL) {
1588 : /* We are in a full reset sequence. */
1589 52 : if (ctrlr_ch->connect_poller != NULL) {
1590 : /* qpair was failed to connect. Abort the reset sequence. */
1591 0 : SPDK_DEBUGLOG(bdev_nvme, "qpair %p was failed to connect. abort the reset ctrlr sequence.\n",
1592 : qpair);
1593 0 : spdk_poller_unregister(&ctrlr_ch->connect_poller);
1594 0 : status = -1;
1595 : } else {
1596 : /* qpair was completed to disconnect. Just move to the next ctrlr_channel. */
1597 52 : SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed in a reset ctrlr sequence.\n",
1598 : qpair);
1599 52 : status = 0;
1600 : }
1601 52 : spdk_for_each_channel_continue(ctrlr_ch->reset_iter, status);
1602 52 : ctrlr_ch->reset_iter = NULL;
1603 : } else {
1604 : /* qpair was disconnected unexpectedly. Reset controller for recovery. */
1605 4 : SPDK_NOTICELOG("qpair %p was disconnected and freed. reset controller.\n", qpair);
1606 4 : bdev_nvme_failover_ctrlr(nvme_qpair->ctrlr);
1607 : }
1608 : } else {
1609 : /* In this case, ctrlr_channel is already deleted. */
1610 43 : SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed. delete nvme_qpair.\n", qpair);
1611 43 : nvme_qpair_delete(nvme_qpair);
1612 : }
1613 : }
1614 :
1615 : static void
1616 0 : bdev_nvme_check_io_qpairs(struct nvme_poll_group *group)
1617 : {
1618 : struct nvme_qpair *nvme_qpair;
1619 :
1620 0 : TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
1621 0 : if (nvme_qpair->qpair == NULL || nvme_qpair->ctrlr_ch == NULL) {
1622 0 : continue;
1623 : }
1624 :
1625 0 : if (spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
1626 : SPDK_NVME_QPAIR_FAILURE_NONE) {
1627 0 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1628 : }
1629 : }
1630 0 : }
1631 :
1632 : static int
1633 1025 : bdev_nvme_poll(void *arg)
1634 : {
1635 1025 : struct nvme_poll_group *group = arg;
1636 : int64_t num_completions;
1637 :
1638 1025 : if (group->collect_spin_stat && group->start_ticks == 0) {
1639 0 : group->start_ticks = spdk_get_ticks();
1640 : }
1641 :
1642 1025 : num_completions = spdk_nvme_poll_group_process_completions(group->group, 0,
1643 : bdev_nvme_disconnected_qpair_cb);
1644 1025 : if (group->collect_spin_stat) {
1645 0 : if (num_completions > 0) {
1646 0 : if (group->end_ticks != 0) {
1647 0 : group->spin_ticks += (group->end_ticks - group->start_ticks);
1648 0 : group->end_ticks = 0;
1649 : }
1650 0 : group->start_ticks = 0;
1651 : } else {
1652 0 : group->end_ticks = spdk_get_ticks();
1653 : }
1654 : }
1655 :
1656 1025 : if (spdk_unlikely(num_completions < 0)) {
1657 0 : bdev_nvme_check_io_qpairs(group);
1658 : }
1659 :
1660 1025 : return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
1661 : }
1662 :
1663 : static int bdev_nvme_poll_adminq(void *arg);
1664 :
1665 : static void
1666 100 : bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_period_us)
1667 : {
1668 100 : spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
1669 :
1670 100 : nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq,
1671 : nvme_ctrlr, new_period_us);
1672 100 : }
1673 :
1674 : static int
1675 146 : bdev_nvme_poll_adminq(void *arg)
1676 : {
1677 : int32_t rc;
1678 146 : struct nvme_ctrlr *nvme_ctrlr = arg;
1679 : nvme_ctrlr_disconnected_cb disconnected_cb;
1680 :
1681 146 : assert(nvme_ctrlr != NULL);
1682 :
1683 146 : rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
1684 146 : if (rc < 0) {
1685 53 : disconnected_cb = nvme_ctrlr->disconnected_cb;
1686 53 : nvme_ctrlr->disconnected_cb = NULL;
1687 :
1688 53 : if (disconnected_cb != NULL) {
1689 50 : bdev_nvme_change_adminq_poll_period(nvme_ctrlr,
1690 : g_opts.nvme_adminq_poll_period_us);
1691 50 : disconnected_cb(nvme_ctrlr);
1692 : } else {
1693 3 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
1694 : }
1695 93 : } else if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(nvme_ctrlr->ctrlr) !=
1696 : SPDK_NVME_QPAIR_FAILURE_NONE) {
1697 0 : bdev_nvme_clear_io_path_caches(nvme_ctrlr);
1698 : }
1699 :
1700 146 : return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
1701 : }
1702 :
1703 : static void
1704 37 : nvme_bdev_free(void *io_device)
1705 : {
1706 37 : struct nvme_bdev *nvme_disk = io_device;
1707 :
1708 37 : pthread_mutex_destroy(&nvme_disk->mutex);
1709 37 : free(nvme_disk->disk.name);
1710 37 : free(nvme_disk->err_stat);
1711 37 : free(nvme_disk);
1712 37 : }
1713 :
1714 : static int
1715 36 : bdev_nvme_destruct(void *ctx)
1716 : {
1717 36 : struct nvme_bdev *nvme_disk = ctx;
1718 : struct nvme_ns *nvme_ns, *tmp_nvme_ns;
1719 :
1720 : SPDK_DTRACE_PROBE2(bdev_nvme_destruct, nvme_disk->nbdev_ctrlr->name, nvme_disk->nsid);
1721 :
1722 73 : TAILQ_FOREACH_SAFE(nvme_ns, &nvme_disk->nvme_ns_list, tailq, tmp_nvme_ns) {
1723 37 : pthread_mutex_lock(&nvme_ns->ctrlr->mutex);
1724 :
1725 37 : nvme_ns->bdev = NULL;
1726 :
1727 37 : assert(nvme_ns->id > 0);
1728 :
1729 37 : if (nvme_ctrlr_get_ns(nvme_ns->ctrlr, nvme_ns->id) == NULL) {
1730 0 : pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
1731 :
1732 0 : nvme_ctrlr_release(nvme_ns->ctrlr);
1733 0 : nvme_ns_free(nvme_ns);
1734 : } else {
1735 37 : pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
1736 : }
1737 : }
1738 :
1739 36 : pthread_mutex_lock(&g_bdev_nvme_mutex);
1740 36 : TAILQ_REMOVE(&nvme_disk->nbdev_ctrlr->bdevs, nvme_disk, tailq);
1741 36 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
1742 :
1743 36 : spdk_io_device_unregister(nvme_disk, nvme_bdev_free);
1744 :
1745 36 : return 0;
1746 : }
1747 :
1748 : static int
1749 100 : bdev_nvme_create_qpair(struct nvme_qpair *nvme_qpair)
1750 : {
1751 : struct nvme_ctrlr *nvme_ctrlr;
1752 100 : struct spdk_nvme_io_qpair_opts opts;
1753 : struct spdk_nvme_qpair *qpair;
1754 : int rc;
1755 :
1756 100 : nvme_ctrlr = nvme_qpair->ctrlr;
1757 :
1758 100 : spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
1759 100 : opts.delay_cmd_submit = g_opts.delay_cmd_submit;
1760 100 : opts.create_only = true;
1761 100 : opts.async_mode = true;
1762 100 : opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests);
1763 100 : g_opts.io_queue_requests = opts.io_queue_requests;
1764 :
1765 100 : qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
1766 100 : if (qpair == NULL) {
1767 0 : return -1;
1768 : }
1769 :
1770 : SPDK_DTRACE_PROBE3(bdev_nvme_create_qpair, nvme_ctrlr->nbdev_ctrlr->name,
1771 : spdk_nvme_qpair_get_id(qpair), spdk_thread_get_id(nvme_ctrlr->thread));
1772 :
1773 100 : assert(nvme_qpair->group != NULL);
1774 :
1775 100 : rc = spdk_nvme_poll_group_add(nvme_qpair->group->group, qpair);
1776 100 : if (rc != 0) {
1777 0 : SPDK_ERRLOG("Unable to begin polling on NVMe Channel.\n");
1778 0 : goto err;
1779 : }
1780 :
1781 100 : rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_ctrlr->ctrlr, qpair);
1782 100 : if (rc != 0) {
1783 0 : SPDK_ERRLOG("Unable to connect I/O qpair.\n");
1784 0 : goto err;
1785 : }
1786 :
1787 100 : nvme_qpair->qpair = qpair;
1788 :
1789 100 : if (!g_opts.disable_auto_failback) {
1790 71 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1791 : }
1792 :
1793 100 : return 0;
1794 :
1795 0 : err:
1796 0 : spdk_nvme_ctrlr_free_io_qpair(qpair);
1797 :
1798 0 : return rc;
1799 : }
1800 :
1801 : static void bdev_nvme_reset_io_continue(void *cb_arg, int rc);
1802 :
1803 : static void
1804 82 : bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i)
1805 : {
1806 82 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
1807 82 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
1808 82 : int rc = 0;
1809 : struct nvme_bdev_io *bio;
1810 :
1811 82 : if (spdk_io_channel_iter_get_ctx(i) != NULL) {
1812 35 : rc = -1;
1813 : }
1814 :
1815 86 : while (!TAILQ_EMPTY(&ctrlr_ch->pending_resets)) {
1816 4 : bio = TAILQ_FIRST(&ctrlr_ch->pending_resets);
1817 4 : TAILQ_REMOVE(&ctrlr_ch->pending_resets, bio, retry_link);
1818 :
1819 4 : bdev_nvme_reset_io_continue(bio, rc);
1820 : }
1821 :
1822 82 : spdk_for_each_channel_continue(i, 0);
1823 82 : }
1824 :
1825 : /* This function marks the current trid as failed by storing the current ticks
1826 : * and then sets the next trid to the active trid within a controller if exists.
1827 : *
1828 : * The purpose of the boolean return value is to request the caller to disconnect
1829 : * the current trid now to try connecting the next trid.
1830 : */
1831 : static bool
1832 36 : bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove, bool start)
1833 : {
1834 : struct nvme_path_id *path_id, *next_path;
1835 : int rc __attribute__((unused));
1836 :
1837 36 : path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
1838 36 : assert(path_id);
1839 36 : assert(path_id == nvme_ctrlr->active_path_id);
1840 36 : next_path = TAILQ_NEXT(path_id, link);
1841 :
1842 : /* Update the last failed time. It means the trid is failed if its last
1843 : * failed time is non-zero.
1844 : */
1845 36 : path_id->last_failed_tsc = spdk_get_ticks();
1846 :
1847 36 : if (next_path == NULL) {
1848 : /* There is no alternate trid within a controller. */
1849 25 : return false;
1850 : }
1851 :
1852 11 : if (!start && nvme_ctrlr->opts.reconnect_delay_sec == 0) {
1853 : /* Connect is not retried in a controller reset sequence. Connecting
1854 : * the next trid will be done by the next bdev_nvme_failover_ctrlr() call.
1855 : */
1856 3 : return false;
1857 : }
1858 :
1859 8 : assert(path_id->trid.trtype != SPDK_NVME_TRANSPORT_PCIE);
1860 :
1861 8 : SPDK_NOTICELOG("Start failover from %s:%s to %s:%s\n", path_id->trid.traddr,
1862 : path_id->trid.trsvcid, next_path->trid.traddr, next_path->trid.trsvcid);
1863 :
1864 8 : spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
1865 8 : nvme_ctrlr->active_path_id = next_path;
1866 8 : rc = spdk_nvme_ctrlr_set_trid(nvme_ctrlr->ctrlr, &next_path->trid);
1867 8 : assert(rc == 0);
1868 8 : TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
1869 8 : if (!remove) {
1870 : /** Shuffle the old trid to the end of the list and use the new one.
1871 : * Allows for round robin through multiple connections.
1872 : */
1873 6 : TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, path_id, link);
1874 : } else {
1875 2 : free(path_id);
1876 : }
1877 :
1878 8 : if (start || next_path->last_failed_tsc == 0) {
1879 : /* bdev_nvme_failover_ctrlr() is just called or the next trid is not failed
1880 : * or used yet. Try the next trid now.
1881 : */
1882 7 : return true;
1883 : }
1884 :
1885 1 : if (spdk_get_ticks() > next_path->last_failed_tsc + spdk_get_ticks_hz() *
1886 1 : nvme_ctrlr->opts.reconnect_delay_sec) {
1887 : /* Enough backoff passed since the next trid failed. Try the next trid now. */
1888 0 : return true;
1889 : }
1890 :
1891 : /* The next trid will be tried after reconnect_delay_sec seconds. */
1892 1 : return false;
1893 : }
1894 :
1895 : static bool
1896 68 : bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr)
1897 : {
1898 : int32_t elapsed;
1899 :
1900 68 : if (nvme_ctrlr->opts.ctrlr_loss_timeout_sec == 0 ||
1901 36 : nvme_ctrlr->opts.ctrlr_loss_timeout_sec == -1) {
1902 42 : return false;
1903 : }
1904 :
1905 26 : elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
1906 26 : if (elapsed >= nvme_ctrlr->opts.ctrlr_loss_timeout_sec) {
1907 6 : return true;
1908 : } else {
1909 20 : return false;
1910 : }
1911 : }
1912 :
1913 : static bool
1914 12 : bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr)
1915 : {
1916 : uint32_t elapsed;
1917 :
1918 12 : if (nvme_ctrlr->opts.fast_io_fail_timeout_sec == 0) {
1919 8 : return false;
1920 : }
1921 :
1922 4 : elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
1923 4 : if (elapsed >= nvme_ctrlr->opts.fast_io_fail_timeout_sec) {
1924 2 : return true;
1925 : } else {
1926 2 : return false;
1927 : }
1928 : }
1929 :
1930 : static void bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success);
1931 :
1932 : static void
1933 51 : nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb cb_fn)
1934 : {
1935 : int rc;
1936 :
1937 51 : rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
1938 51 : if (rc != 0) {
1939 : /* Disconnect fails if ctrlr is already resetting or removed. In this case,
1940 : * fail the reset sequence immediately.
1941 : */
1942 1 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
1943 1 : return;
1944 : }
1945 :
1946 : /* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
1947 : * Set callback here to execute the specified operation after ctrlr is really disconnected.
1948 : */
1949 50 : assert(nvme_ctrlr->disconnected_cb == NULL);
1950 50 : nvme_ctrlr->disconnected_cb = cb_fn;
1951 :
1952 : /* During disconnection, reduce the period to poll adminq more often. */
1953 50 : bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0);
1954 : }
1955 :
1956 : enum bdev_nvme_op_after_reset {
1957 : OP_NONE,
1958 : OP_COMPLETE_PENDING_DESTRUCT,
1959 : OP_DESTRUCT,
1960 : OP_DELAYED_RECONNECT,
1961 : OP_FAILOVER,
1962 : };
1963 :
1964 : typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset;
1965 :
1966 : static _bdev_nvme_op_after_reset
1967 50 : bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success)
1968 : {
1969 50 : if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
1970 : /* Complete pending destruct after reset completes. */
1971 0 : return OP_COMPLETE_PENDING_DESTRUCT;
1972 50 : } else if (nvme_ctrlr->pending_failover) {
1973 3 : nvme_ctrlr->pending_failover = false;
1974 3 : nvme_ctrlr->reset_start_tsc = 0;
1975 3 : return OP_FAILOVER;
1976 47 : } else if (success || nvme_ctrlr->opts.reconnect_delay_sec == 0) {
1977 33 : nvme_ctrlr->reset_start_tsc = 0;
1978 33 : return OP_NONE;
1979 14 : } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
1980 2 : return OP_DESTRUCT;
1981 : } else {
1982 12 : if (bdev_nvme_check_fast_io_fail_timeout(nvme_ctrlr)) {
1983 2 : nvme_ctrlr->fast_io_fail_timedout = true;
1984 : }
1985 12 : return OP_DELAYED_RECONNECT;
1986 : }
1987 : }
1988 :
1989 : static int bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug);
1990 : static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
1991 :
1992 : static int
1993 9 : bdev_nvme_reconnect_delay_timer_expired(void *ctx)
1994 : {
1995 9 : struct nvme_ctrlr *nvme_ctrlr = ctx;
1996 :
1997 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect_delay, nvme_ctrlr->nbdev_ctrlr->name);
1998 9 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1999 :
2000 9 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2001 :
2002 9 : if (!nvme_ctrlr->reconnect_is_delayed) {
2003 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2004 0 : return SPDK_POLLER_BUSY;
2005 : }
2006 :
2007 9 : nvme_ctrlr->reconnect_is_delayed = false;
2008 :
2009 9 : if (nvme_ctrlr->destruct) {
2010 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2011 0 : return SPDK_POLLER_BUSY;
2012 : }
2013 :
2014 9 : assert(nvme_ctrlr->resetting == false);
2015 9 : nvme_ctrlr->resetting = true;
2016 :
2017 9 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2018 :
2019 9 : spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
2020 :
2021 9 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2022 9 : return SPDK_POLLER_BUSY;
2023 : }
2024 :
2025 : static void
2026 12 : bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)
2027 : {
2028 12 : spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
2029 :
2030 12 : assert(nvme_ctrlr->reconnect_is_delayed == false);
2031 12 : nvme_ctrlr->reconnect_is_delayed = true;
2032 :
2033 12 : assert(nvme_ctrlr->reconnect_delay_timer == NULL);
2034 12 : nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired,
2035 : nvme_ctrlr,
2036 : nvme_ctrlr->opts.reconnect_delay_sec * SPDK_SEC_TO_USEC);
2037 12 : }
2038 :
2039 : static void remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr);
2040 :
2041 : static void
2042 48 : _bdev_nvme_reset_ctrlr_complete(struct spdk_io_channel_iter *i, int status)
2043 : {
2044 48 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2045 48 : bool success = spdk_io_channel_iter_get_ctx(i) == NULL;
2046 48 : bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
2047 48 : void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
2048 : enum bdev_nvme_op_after_reset op_after_reset;
2049 :
2050 48 : assert(nvme_ctrlr->thread == spdk_get_thread());
2051 :
2052 48 : nvme_ctrlr->ctrlr_op_cb_fn = NULL;
2053 48 : nvme_ctrlr->ctrlr_op_cb_arg = NULL;
2054 :
2055 48 : if (!success) {
2056 21 : SPDK_ERRLOG("Resetting controller failed.\n");
2057 : } else {
2058 27 : SPDK_NOTICELOG("Resetting controller successful.\n");
2059 : }
2060 :
2061 48 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2062 48 : nvme_ctrlr->resetting = false;
2063 48 : nvme_ctrlr->dont_retry = false;
2064 48 : nvme_ctrlr->in_failover = false;
2065 :
2066 48 : op_after_reset = bdev_nvme_check_op_after_reset(nvme_ctrlr, success);
2067 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2068 :
2069 : /* Delay callbacks when the next operation is a failover. */
2070 48 : if (ctrlr_op_cb_fn && op_after_reset != OP_FAILOVER) {
2071 10 : ctrlr_op_cb_fn(ctrlr_op_cb_arg, success ? 0 : -1);
2072 : }
2073 :
2074 48 : switch (op_after_reset) {
2075 0 : case OP_COMPLETE_PENDING_DESTRUCT:
2076 0 : nvme_ctrlr_unregister(nvme_ctrlr);
2077 0 : break;
2078 2 : case OP_DESTRUCT:
2079 2 : bdev_nvme_delete_ctrlr(nvme_ctrlr, false);
2080 2 : remove_discovery_entry(nvme_ctrlr);
2081 2 : break;
2082 12 : case OP_DELAYED_RECONNECT:
2083 12 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_start_reconnect_delay_timer);
2084 12 : break;
2085 3 : case OP_FAILOVER:
2086 3 : nvme_ctrlr->ctrlr_op_cb_fn = ctrlr_op_cb_fn;
2087 3 : nvme_ctrlr->ctrlr_op_cb_arg = ctrlr_op_cb_arg;
2088 3 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
2089 3 : break;
2090 31 : default:
2091 31 : break;
2092 : }
2093 48 : }
2094 :
2095 : static void
2096 50 : bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
2097 : {
2098 50 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2099 50 : if (!success) {
2100 : /* Connecting the active trid failed. Set the next alternate trid to the
2101 : * active trid if it exists.
2102 : */
2103 23 : if (bdev_nvme_failover_trid(nvme_ctrlr, false, false)) {
2104 : /* The next alternate trid exists and is ready to try. Try it now. */
2105 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2106 :
2107 2 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
2108 2 : return;
2109 : }
2110 :
2111 : /* We came here if there is no alternate trid or if the next trid exists but
2112 : * is not ready to try. We will try the active trid after reconnect_delay_sec
2113 : * seconds if it is non-zero or at the next reset call otherwise.
2114 : */
2115 : } else {
2116 : /* Connecting the active trid succeeded. Clear the last failed time because it
2117 : * means the trid is failed if its last failed time is non-zero.
2118 : */
2119 27 : nvme_ctrlr->active_path_id->last_failed_tsc = 0;
2120 : }
2121 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2122 :
2123 : /* Make sure we clear any pending resets before returning. */
2124 48 : spdk_for_each_channel(nvme_ctrlr,
2125 : bdev_nvme_complete_pending_resets,
2126 : success ? NULL : (void *)0x1,
2127 : _bdev_nvme_reset_ctrlr_complete);
2128 : }
2129 :
2130 : static void
2131 0 : bdev_nvme_reset_create_qpairs_failed(struct spdk_io_channel_iter *i, int status)
2132 : {
2133 0 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2134 :
2135 0 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2136 0 : }
2137 :
2138 : static void
2139 62 : bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i)
2140 : {
2141 62 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
2142 62 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
2143 : struct nvme_qpair *nvme_qpair;
2144 :
2145 62 : nvme_qpair = ctrlr_ch->qpair;
2146 62 : assert(nvme_qpair != NULL);
2147 :
2148 62 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
2149 :
2150 62 : if (nvme_qpair->qpair != NULL) {
2151 52 : if (nvme_qpair->ctrlr->dont_retry) {
2152 39 : spdk_nvme_qpair_set_abort_dnr(nvme_qpair->qpair, true);
2153 : }
2154 52 : spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
2155 :
2156 : /* The current full reset sequence will move to the next
2157 : * ctrlr_channel after the qpair is actually disconnected.
2158 : */
2159 52 : assert(ctrlr_ch->reset_iter == NULL);
2160 52 : ctrlr_ch->reset_iter = i;
2161 : } else {
2162 10 : spdk_for_each_channel_continue(i, 0);
2163 : }
2164 62 : }
2165 :
2166 : static void
2167 27 : bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status)
2168 : {
2169 27 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2170 :
2171 27 : if (status == 0) {
2172 27 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, true);
2173 : } else {
2174 : /* Delete the added qpairs and quiesce ctrlr to make the states clean. */
2175 0 : spdk_for_each_channel(nvme_ctrlr,
2176 : bdev_nvme_reset_destroy_qpair,
2177 : NULL,
2178 : bdev_nvme_reset_create_qpairs_failed);
2179 : }
2180 27 : }
2181 :
2182 : static int
2183 43 : bdev_nvme_reset_check_qpair_connected(void *ctx)
2184 : {
2185 43 : struct nvme_ctrlr_channel *ctrlr_ch = ctx;
2186 :
2187 43 : if (ctrlr_ch->reset_iter == NULL) {
2188 : /* qpair was already failed to connect and the reset sequence is being aborted. */
2189 0 : assert(ctrlr_ch->connect_poller == NULL);
2190 0 : assert(ctrlr_ch->qpair->qpair == NULL);
2191 0 : return SPDK_POLLER_BUSY;
2192 : }
2193 :
2194 43 : assert(ctrlr_ch->qpair->qpair != NULL);
2195 :
2196 43 : if (!spdk_nvme_qpair_is_connected(ctrlr_ch->qpair->qpair)) {
2197 0 : return SPDK_POLLER_BUSY;
2198 : }
2199 :
2200 43 : spdk_poller_unregister(&ctrlr_ch->connect_poller);
2201 :
2202 : /* qpair was completed to connect. Move to the next ctrlr_channel */
2203 43 : spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
2204 43 : ctrlr_ch->reset_iter = NULL;
2205 :
2206 43 : if (!g_opts.disable_auto_failback) {
2207 30 : _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
2208 : }
2209 :
2210 43 : return SPDK_POLLER_BUSY;
2211 : }
2212 :
2213 : static void
2214 43 : bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i)
2215 : {
2216 43 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
2217 43 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
2218 : int rc;
2219 :
2220 43 : rc = bdev_nvme_create_qpair(ctrlr_ch->qpair);
2221 43 : if (rc == 0) {
2222 43 : ctrlr_ch->connect_poller = SPDK_POLLER_REGISTER(bdev_nvme_reset_check_qpair_connected,
2223 : ctrlr_ch, 0);
2224 :
2225 : /* The current full reset sequence will move to the next
2226 : * ctrlr_channel after the qpair is actually connected.
2227 : */
2228 43 : assert(ctrlr_ch->reset_iter == NULL);
2229 43 : ctrlr_ch->reset_iter = i;
2230 : } else {
2231 0 : spdk_for_each_channel_continue(i, rc);
2232 : }
2233 43 : }
2234 :
2235 : static void
2236 27 : nvme_ctrlr_check_namespaces(struct nvme_ctrlr *nvme_ctrlr)
2237 : {
2238 27 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
2239 : struct nvme_ns *nvme_ns;
2240 :
2241 27 : for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
2242 39 : nvme_ns != NULL;
2243 12 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
2244 12 : if (!spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
2245 1 : SPDK_DEBUGLOG(bdev_nvme, "NSID %u was removed during reset.\n", nvme_ns->id);
2246 : /* NS can be added again. Just nullify nvme_ns->ns. */
2247 1 : nvme_ns->ns = NULL;
2248 : }
2249 : }
2250 27 : }
2251 :
2252 :
2253 : static int
2254 49 : bdev_nvme_reconnect_ctrlr_poll(void *arg)
2255 : {
2256 49 : struct nvme_ctrlr *nvme_ctrlr = arg;
2257 49 : int rc = -ETIMEDOUT;
2258 :
2259 49 : if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
2260 : /* Mark the ctrlr as failed. The next call to
2261 : * spdk_nvme_ctrlr_reconnect_poll_async() will then
2262 : * do the necessary cleanup and return failure.
2263 : */
2264 2 : spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
2265 : }
2266 :
2267 49 : rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
2268 49 : if (rc == -EAGAIN) {
2269 0 : return SPDK_POLLER_BUSY;
2270 : }
2271 :
2272 49 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
2273 49 : if (rc == 0) {
2274 27 : nvme_ctrlr_check_namespaces(nvme_ctrlr);
2275 :
2276 : /* Recreate all of the I/O queue pairs */
2277 27 : spdk_for_each_channel(nvme_ctrlr,
2278 : bdev_nvme_reset_create_qpair,
2279 : NULL,
2280 : bdev_nvme_reset_create_qpairs_done);
2281 : } else {
2282 22 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2283 : }
2284 49 : return SPDK_POLLER_BUSY;
2285 : }
2286 :
2287 : static void
2288 49 : bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2289 : {
2290 49 : spdk_nvme_ctrlr_reconnect_async(nvme_ctrlr->ctrlr);
2291 :
2292 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect, nvme_ctrlr->nbdev_ctrlr->name);
2293 49 : assert(nvme_ctrlr->reset_detach_poller == NULL);
2294 49 : nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_ctrlr_poll,
2295 : nvme_ctrlr, 0);
2296 49 : }
2297 :
2298 : static void
2299 36 : bdev_nvme_reset_destroy_qpair_done(struct spdk_io_channel_iter *i, int status)
2300 : {
2301 36 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2302 :
2303 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reset, nvme_ctrlr->nbdev_ctrlr->name);
2304 36 : assert(status == 0);
2305 :
2306 36 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2307 0 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2308 : } else {
2309 36 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
2310 : }
2311 36 : }
2312 :
2313 : static void
2314 36 : bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
2315 : {
2316 36 : spdk_for_each_channel(nvme_ctrlr,
2317 : bdev_nvme_reset_destroy_qpair,
2318 : NULL,
2319 : bdev_nvme_reset_destroy_qpair_done);
2320 36 : }
2321 :
2322 : static void
2323 3 : bdev_nvme_reconnect_ctrlr_now(void *ctx)
2324 : {
2325 3 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2326 :
2327 3 : assert(nvme_ctrlr->resetting == true);
2328 3 : assert(nvme_ctrlr->thread == spdk_get_thread());
2329 :
2330 3 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2331 :
2332 3 : spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
2333 :
2334 3 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2335 3 : }
2336 :
2337 : static void
2338 36 : _bdev_nvme_reset_ctrlr(void *ctx)
2339 : {
2340 36 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2341 :
2342 36 : assert(nvme_ctrlr->resetting == true);
2343 36 : assert(nvme_ctrlr->thread == spdk_get_thread());
2344 :
2345 36 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2346 0 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reset_destroy_qpairs);
2347 : } else {
2348 36 : bdev_nvme_reset_destroy_qpairs(nvme_ctrlr);
2349 : }
2350 36 : }
2351 :
2352 : static int
2353 34 : bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2354 : {
2355 : spdk_msg_fn msg_fn;
2356 :
2357 34 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2358 34 : if (nvme_ctrlr->destruct) {
2359 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2360 3 : return -ENXIO;
2361 : }
2362 :
2363 31 : if (nvme_ctrlr->resetting) {
2364 6 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2365 6 : SPDK_NOTICELOG("Unable to perform reset, already in progress.\n");
2366 6 : return -EBUSY;
2367 : }
2368 :
2369 25 : if (nvme_ctrlr->disabled) {
2370 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2371 0 : SPDK_NOTICELOG("Unable to perform reset. Controller is disabled.\n");
2372 0 : return -EALREADY;
2373 : }
2374 :
2375 25 : nvme_ctrlr->resetting = true;
2376 25 : nvme_ctrlr->dont_retry = true;
2377 :
2378 25 : if (nvme_ctrlr->reconnect_is_delayed) {
2379 1 : SPDK_DEBUGLOG(bdev_nvme, "Reconnect is already scheduled.\n");
2380 1 : msg_fn = bdev_nvme_reconnect_ctrlr_now;
2381 1 : nvme_ctrlr->reconnect_is_delayed = false;
2382 : } else {
2383 24 : msg_fn = _bdev_nvme_reset_ctrlr;
2384 24 : assert(nvme_ctrlr->reset_start_tsc == 0);
2385 : }
2386 :
2387 25 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2388 :
2389 25 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2390 :
2391 25 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
2392 25 : return 0;
2393 : }
2394 :
2395 : static int
2396 3 : bdev_nvme_enable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2397 : {
2398 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2399 3 : if (nvme_ctrlr->destruct) {
2400 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2401 0 : return -ENXIO;
2402 : }
2403 :
2404 3 : if (nvme_ctrlr->resetting) {
2405 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2406 0 : return -EBUSY;
2407 : }
2408 :
2409 3 : if (!nvme_ctrlr->disabled) {
2410 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2411 1 : return -EALREADY;
2412 : }
2413 :
2414 2 : nvme_ctrlr->disabled = false;
2415 2 : nvme_ctrlr->resetting = true;
2416 :
2417 2 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2418 :
2419 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2420 :
2421 2 : spdk_thread_send_msg(nvme_ctrlr->thread, bdev_nvme_reconnect_ctrlr_now, nvme_ctrlr);
2422 2 : return 0;
2423 : }
2424 :
2425 : static void
2426 2 : _bdev_nvme_disable_ctrlr_complete(struct spdk_io_channel_iter *i, int status)
2427 : {
2428 2 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2429 2 : bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
2430 2 : void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
2431 : enum bdev_nvme_op_after_reset op_after_disable;
2432 :
2433 2 : assert(nvme_ctrlr->thread == spdk_get_thread());
2434 :
2435 2 : nvme_ctrlr->ctrlr_op_cb_fn = NULL;
2436 2 : nvme_ctrlr->ctrlr_op_cb_arg = NULL;
2437 :
2438 2 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2439 :
2440 2 : nvme_ctrlr->resetting = false;
2441 2 : nvme_ctrlr->dont_retry = false;
2442 :
2443 2 : op_after_disable = bdev_nvme_check_op_after_reset(nvme_ctrlr, true);
2444 :
2445 2 : nvme_ctrlr->disabled = true;
2446 2 : spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
2447 :
2448 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2449 :
2450 2 : if (ctrlr_op_cb_fn) {
2451 0 : ctrlr_op_cb_fn(ctrlr_op_cb_arg, 0);
2452 : }
2453 :
2454 2 : switch (op_after_disable) {
2455 0 : case OP_COMPLETE_PENDING_DESTRUCT:
2456 0 : nvme_ctrlr_unregister(nvme_ctrlr);
2457 0 : break;
2458 2 : default:
2459 2 : break;
2460 : }
2461 :
2462 2 : }
2463 :
2464 : static void
2465 2 : bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr)
2466 : {
2467 : /* Make sure we clear any pending resets before returning. */
2468 2 : spdk_for_each_channel(nvme_ctrlr,
2469 : bdev_nvme_complete_pending_resets,
2470 : NULL,
2471 : _bdev_nvme_disable_ctrlr_complete);
2472 2 : }
2473 :
2474 : static void
2475 1 : bdev_nvme_disable_destroy_qpairs_done(struct spdk_io_channel_iter *i, int status)
2476 : {
2477 1 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2478 :
2479 1 : assert(status == 0);
2480 :
2481 1 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2482 0 : bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
2483 : } else {
2484 1 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_ctrlr_complete);
2485 : }
2486 1 : }
2487 :
2488 : static void
2489 1 : bdev_nvme_disable_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
2490 : {
2491 1 : spdk_for_each_channel(nvme_ctrlr,
2492 : bdev_nvme_reset_destroy_qpair,
2493 : NULL,
2494 : bdev_nvme_disable_destroy_qpairs_done);
2495 1 : }
2496 :
2497 : static void
2498 1 : _bdev_nvme_cancel_reconnect_and_disable_ctrlr(void *ctx)
2499 : {
2500 1 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2501 :
2502 1 : assert(nvme_ctrlr->resetting == true);
2503 1 : assert(nvme_ctrlr->thread == spdk_get_thread());
2504 :
2505 1 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2506 :
2507 1 : bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
2508 1 : }
2509 :
2510 : static void
2511 1 : _bdev_nvme_disconnect_and_disable_ctrlr(void *ctx)
2512 : {
2513 1 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2514 :
2515 1 : assert(nvme_ctrlr->resetting == true);
2516 1 : assert(nvme_ctrlr->thread == spdk_get_thread());
2517 :
2518 1 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2519 0 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_destroy_qpairs);
2520 : } else {
2521 1 : bdev_nvme_disable_destroy_qpairs(nvme_ctrlr);
2522 : }
2523 1 : }
2524 :
2525 : static int
2526 5 : bdev_nvme_disable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2527 : {
2528 : spdk_msg_fn msg_fn;
2529 :
2530 5 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2531 5 : if (nvme_ctrlr->destruct) {
2532 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2533 1 : return -ENXIO;
2534 : }
2535 :
2536 4 : if (nvme_ctrlr->resetting) {
2537 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2538 1 : return -EBUSY;
2539 : }
2540 :
2541 3 : if (nvme_ctrlr->disabled) {
2542 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2543 1 : return -EALREADY;
2544 : }
2545 :
2546 2 : nvme_ctrlr->resetting = true;
2547 2 : nvme_ctrlr->dont_retry = true;
2548 :
2549 2 : if (nvme_ctrlr->reconnect_is_delayed) {
2550 1 : msg_fn = _bdev_nvme_cancel_reconnect_and_disable_ctrlr;
2551 1 : nvme_ctrlr->reconnect_is_delayed = false;
2552 : } else {
2553 1 : msg_fn = _bdev_nvme_disconnect_and_disable_ctrlr;
2554 : }
2555 :
2556 2 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2557 :
2558 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2559 :
2560 2 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
2561 2 : return 0;
2562 : }
2563 :
2564 : static int
2565 16 : nvme_ctrlr_op(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
2566 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2567 : {
2568 : int rc;
2569 :
2570 16 : switch (op) {
2571 15 : case NVME_CTRLR_OP_RESET:
2572 15 : rc = bdev_nvme_reset_ctrlr(nvme_ctrlr);
2573 15 : break;
2574 0 : case NVME_CTRLR_OP_ENABLE:
2575 0 : rc = bdev_nvme_enable_ctrlr(nvme_ctrlr);
2576 0 : break;
2577 0 : case NVME_CTRLR_OP_DISABLE:
2578 0 : rc = bdev_nvme_disable_ctrlr(nvme_ctrlr);
2579 0 : break;
2580 1 : default:
2581 1 : rc = -EINVAL;
2582 1 : break;
2583 : }
2584 :
2585 16 : if (rc == 0) {
2586 9 : assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
2587 9 : assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
2588 9 : nvme_ctrlr->ctrlr_op_cb_fn = cb_fn;
2589 9 : nvme_ctrlr->ctrlr_op_cb_arg = cb_arg;
2590 : }
2591 16 : return rc;
2592 : }
2593 :
2594 : struct nvme_ctrlr_op_rpc_ctx {
2595 : struct nvme_ctrlr *nvme_ctrlr;
2596 : struct spdk_thread *orig_thread;
2597 : enum nvme_ctrlr_op op;
2598 : int rc;
2599 : bdev_nvme_ctrlr_op_cb cb_fn;
2600 : void *cb_arg;
2601 : };
2602 :
2603 : static void
2604 4 : _nvme_ctrlr_op_rpc_complete(void *_ctx)
2605 : {
2606 4 : struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
2607 :
2608 4 : assert(ctx != NULL);
2609 4 : assert(ctx->cb_fn != NULL);
2610 :
2611 4 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
2612 :
2613 4 : free(ctx);
2614 4 : }
2615 :
2616 : static void
2617 4 : nvme_ctrlr_op_rpc_complete(void *cb_arg, int rc)
2618 : {
2619 4 : struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
2620 :
2621 4 : ctx->rc = rc;
2622 :
2623 4 : spdk_thread_send_msg(ctx->orig_thread, _nvme_ctrlr_op_rpc_complete, ctx);
2624 4 : }
2625 :
2626 : void
2627 4 : nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
2628 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2629 : {
2630 : struct nvme_ctrlr_op_rpc_ctx *ctx;
2631 : int rc;
2632 :
2633 4 : assert(cb_fn != NULL);
2634 :
2635 4 : ctx = calloc(1, sizeof(*ctx));
2636 4 : if (ctx == NULL) {
2637 0 : SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
2638 0 : cb_fn(cb_arg, -ENOMEM);
2639 0 : return;
2640 : }
2641 :
2642 4 : ctx->orig_thread = spdk_get_thread();
2643 4 : ctx->cb_fn = cb_fn;
2644 4 : ctx->cb_arg = cb_arg;
2645 :
2646 4 : rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_ctrlr_op_rpc_complete, ctx);
2647 4 : if (rc == 0) {
2648 1 : return;
2649 3 : } else if (rc == -EALREADY) {
2650 0 : rc = 0;
2651 : }
2652 :
2653 3 : nvme_ctrlr_op_rpc_complete(ctx, rc);
2654 : }
2655 :
2656 : static void nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc);
2657 :
2658 : static void
2659 2 : _nvme_bdev_ctrlr_op_rpc_continue(void *_ctx)
2660 : {
2661 2 : struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
2662 : struct nvme_ctrlr *prev_nvme_ctrlr, *next_nvme_ctrlr;
2663 : int rc;
2664 :
2665 2 : prev_nvme_ctrlr = ctx->nvme_ctrlr;
2666 2 : ctx->nvme_ctrlr = NULL;
2667 :
2668 2 : if (ctx->rc != 0) {
2669 0 : goto complete;
2670 : }
2671 :
2672 2 : next_nvme_ctrlr = TAILQ_NEXT(prev_nvme_ctrlr, tailq);
2673 2 : if (next_nvme_ctrlr == NULL) {
2674 1 : goto complete;
2675 : }
2676 :
2677 1 : rc = nvme_ctrlr_op(next_nvme_ctrlr, ctx->op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
2678 1 : if (rc == 0) {
2679 1 : ctx->nvme_ctrlr = next_nvme_ctrlr;
2680 1 : return;
2681 0 : } else if (rc == -EALREADY) {
2682 0 : ctx->nvme_ctrlr = next_nvme_ctrlr;
2683 0 : rc = 0;
2684 : }
2685 :
2686 0 : ctx->rc = rc;
2687 :
2688 1 : complete:
2689 1 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
2690 1 : free(ctx);
2691 : }
2692 :
2693 : static void
2694 2 : nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc)
2695 : {
2696 2 : struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
2697 :
2698 2 : ctx->rc = rc;
2699 :
2700 2 : spdk_thread_send_msg(ctx->orig_thread, _nvme_bdev_ctrlr_op_rpc_continue, ctx);
2701 2 : }
2702 :
2703 : void
2704 1 : nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op,
2705 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2706 : {
2707 : struct nvme_ctrlr_op_rpc_ctx *ctx;
2708 : struct nvme_ctrlr *nvme_ctrlr;
2709 : int rc;
2710 :
2711 1 : assert(cb_fn != NULL);
2712 :
2713 1 : ctx = calloc(1, sizeof(*ctx));
2714 1 : if (ctx == NULL) {
2715 0 : SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
2716 0 : cb_fn(cb_arg, -ENOMEM);
2717 0 : return;
2718 : }
2719 :
2720 1 : ctx->orig_thread = spdk_get_thread();
2721 1 : ctx->op = op;
2722 1 : ctx->cb_fn = cb_fn;
2723 1 : ctx->cb_arg = cb_arg;
2724 :
2725 1 : nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
2726 1 : assert(nvme_ctrlr != NULL);
2727 :
2728 1 : rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
2729 1 : if (rc == 0) {
2730 1 : ctx->nvme_ctrlr = nvme_ctrlr;
2731 1 : return;
2732 0 : } else if (rc == -EALREADY) {
2733 0 : ctx->nvme_ctrlr = nvme_ctrlr;
2734 0 : rc = 0;
2735 : }
2736 :
2737 0 : nvme_bdev_ctrlr_op_rpc_continue(ctx, rc);
2738 : }
2739 :
2740 : static int _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio);
2741 :
2742 : static void
2743 7 : _bdev_nvme_reset_io_complete(struct spdk_io_channel_iter *i, int status)
2744 : {
2745 7 : struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i);
2746 : enum spdk_bdev_io_status io_status;
2747 :
2748 7 : if (bio->cpl.cdw0 == 0) {
2749 5 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
2750 : } else {
2751 2 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
2752 : }
2753 :
2754 7 : __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), io_status, NULL);
2755 7 : }
2756 :
2757 : static void
2758 14 : bdev_nvme_abort_bdev_channel(struct spdk_io_channel_iter *i)
2759 : {
2760 14 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
2761 14 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
2762 :
2763 14 : bdev_nvme_abort_retry_ios(nbdev_ch);
2764 :
2765 14 : spdk_for_each_channel_continue(i, 0);
2766 14 : }
2767 :
2768 : static void
2769 7 : bdev_nvme_reset_io_complete(struct nvme_bdev_io *bio)
2770 : {
2771 7 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
2772 7 : struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
2773 :
2774 : /* Abort all queued I/Os for retry. */
2775 7 : spdk_for_each_channel(nbdev,
2776 : bdev_nvme_abort_bdev_channel,
2777 : bio,
2778 : _bdev_nvme_reset_io_complete);
2779 7 : }
2780 :
2781 : static void
2782 10 : _bdev_nvme_reset_io_continue(void *ctx)
2783 : {
2784 10 : struct nvme_bdev_io *bio = ctx;
2785 : struct nvme_io_path *prev_io_path, *next_io_path;
2786 : int rc;
2787 :
2788 10 : prev_io_path = bio->io_path;
2789 10 : bio->io_path = NULL;
2790 :
2791 10 : if (bio->cpl.cdw0 != 0) {
2792 2 : goto complete;
2793 : }
2794 :
2795 8 : next_io_path = STAILQ_NEXT(prev_io_path, stailq);
2796 8 : if (next_io_path == NULL) {
2797 5 : goto complete;
2798 : }
2799 :
2800 3 : rc = _bdev_nvme_reset_io(next_io_path, bio);
2801 3 : if (rc == 0) {
2802 3 : return;
2803 : }
2804 :
2805 0 : bio->cpl.cdw0 = 1;
2806 :
2807 7 : complete:
2808 7 : bdev_nvme_reset_io_complete(bio);
2809 : }
2810 :
2811 : static void
2812 10 : bdev_nvme_reset_io_continue(void *cb_arg, int rc)
2813 : {
2814 10 : struct nvme_bdev_io *bio = cb_arg;
2815 10 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
2816 :
2817 10 : bio->cpl.cdw0 = (rc == 0) ? 0 : 1;
2818 :
2819 10 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), _bdev_nvme_reset_io_continue, bio);
2820 10 : }
2821 :
2822 : static int
2823 10 : _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
2824 : {
2825 : struct nvme_ctrlr_channel *ctrlr_ch;
2826 : int rc;
2827 :
2828 10 : rc = nvme_ctrlr_op(io_path->qpair->ctrlr, NVME_CTRLR_OP_RESET,
2829 : bdev_nvme_reset_io_continue, bio);
2830 10 : if (rc != 0 && rc != -EBUSY) {
2831 0 : return rc;
2832 : }
2833 :
2834 10 : assert(bio->io_path == NULL);
2835 10 : bio->io_path = io_path;
2836 :
2837 10 : if (rc == -EBUSY) {
2838 4 : ctrlr_ch = io_path->qpair->ctrlr_ch;
2839 4 : assert(ctrlr_ch != NULL);
2840 : /*
2841 : * Reset call is queued only if it is from the app framework. This is on purpose so that
2842 : * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
2843 : * upper level. If they are in the middle of a reset, we won't try to schedule another one.
2844 : */
2845 4 : TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bio, retry_link);
2846 : }
2847 :
2848 10 : return 0;
2849 : }
2850 :
2851 : static void
2852 7 : bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio)
2853 : {
2854 : struct nvme_io_path *io_path;
2855 : int rc;
2856 :
2857 7 : bio->cpl.cdw0 = 0;
2858 :
2859 : /* Reset all nvme_ctrlrs of a bdev controller sequentially. */
2860 7 : io_path = STAILQ_FIRST(&nbdev_ch->io_path_list);
2861 7 : assert(io_path != NULL);
2862 :
2863 7 : rc = _bdev_nvme_reset_io(io_path, bio);
2864 7 : if (rc != 0) {
2865 : /* If the current nvme_ctrlr is disabled, skip it and move to the next nvme_ctrlr. */
2866 0 : rc = (rc == -EALREADY) ? 0 : rc;
2867 :
2868 0 : bdev_nvme_reset_io_continue(bio, rc);
2869 : }
2870 7 : }
2871 :
2872 : static int
2873 18 : bdev_nvme_failover_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool remove)
2874 : {
2875 18 : if (nvme_ctrlr->destruct) {
2876 : /* Don't bother resetting if the controller is in the process of being destructed. */
2877 2 : return -ENXIO;
2878 : }
2879 :
2880 16 : if (nvme_ctrlr->resetting) {
2881 3 : if (!nvme_ctrlr->in_failover) {
2882 3 : SPDK_NOTICELOG("Reset is already in progress. Defer failover until reset completes.\n");
2883 :
2884 : /* Defer failover until reset completes. */
2885 3 : nvme_ctrlr->pending_failover = true;
2886 3 : return -EINPROGRESS;
2887 : } else {
2888 0 : SPDK_NOTICELOG("Unable to perform failover, already in progress.\n");
2889 0 : return -EBUSY;
2890 : }
2891 : }
2892 :
2893 13 : bdev_nvme_failover_trid(nvme_ctrlr, remove, true);
2894 :
2895 13 : if (nvme_ctrlr->reconnect_is_delayed) {
2896 1 : SPDK_NOTICELOG("Reconnect is already scheduled.\n");
2897 :
2898 : /* We rely on the next reconnect for the failover. */
2899 1 : return -EALREADY;
2900 : }
2901 :
2902 12 : if (nvme_ctrlr->disabled) {
2903 0 : SPDK_NOTICELOG("Controller is disabled.\n");
2904 :
2905 : /* We rely on the enablement for the failover. */
2906 0 : return -EALREADY;
2907 : }
2908 :
2909 12 : nvme_ctrlr->resetting = true;
2910 12 : nvme_ctrlr->in_failover = true;
2911 :
2912 12 : assert(nvme_ctrlr->reset_start_tsc == 0);
2913 12 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2914 :
2915 12 : return 0;
2916 : }
2917 :
2918 : static int
2919 16 : bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2920 : {
2921 : int rc;
2922 :
2923 16 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2924 16 : rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, false);
2925 16 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2926 :
2927 16 : if (rc == 0) {
2928 11 : spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset_ctrlr, nvme_ctrlr);
2929 5 : } else if (rc == -EALREADY) {
2930 0 : rc = 0;
2931 : }
2932 :
2933 16 : return rc;
2934 : }
2935 :
2936 : static int bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks,
2937 : uint64_t num_blocks);
2938 :
2939 : static int bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks,
2940 : uint64_t num_blocks);
2941 :
2942 : static int bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks,
2943 : uint64_t src_offset_blocks,
2944 : uint64_t num_blocks);
2945 :
2946 : static void
2947 1 : bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
2948 : bool success)
2949 : {
2950 1 : struct nvme_bdev_io *bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
2951 : int ret;
2952 :
2953 1 : if (!success) {
2954 0 : ret = -EINVAL;
2955 0 : goto exit;
2956 : }
2957 :
2958 1 : if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
2959 0 : ret = -ENXIO;
2960 0 : goto exit;
2961 : }
2962 :
2963 1 : ret = bdev_nvme_readv(bio,
2964 : bdev_io->u.bdev.iovs,
2965 : bdev_io->u.bdev.iovcnt,
2966 : bdev_io->u.bdev.md_buf,
2967 : bdev_io->u.bdev.num_blocks,
2968 : bdev_io->u.bdev.offset_blocks,
2969 : bdev_io->u.bdev.dif_check_flags,
2970 : bdev_io->u.bdev.memory_domain,
2971 : bdev_io->u.bdev.memory_domain_ctx,
2972 : bdev_io->u.bdev.accel_sequence);
2973 :
2974 1 : exit:
2975 1 : if (spdk_unlikely(ret != 0)) {
2976 0 : bdev_nvme_io_complete(bio, ret);
2977 : }
2978 1 : }
2979 :
2980 : static inline void
2981 51 : _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
2982 : {
2983 51 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
2984 51 : struct spdk_bdev *bdev = bdev_io->bdev;
2985 : struct nvme_bdev_io *nbdev_io_to_abort;
2986 51 : int rc = 0;
2987 :
2988 51 : switch (bdev_io->type) {
2989 3 : case SPDK_BDEV_IO_TYPE_READ:
2990 3 : if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) {
2991 :
2992 2 : rc = bdev_nvme_readv(nbdev_io,
2993 : bdev_io->u.bdev.iovs,
2994 : bdev_io->u.bdev.iovcnt,
2995 : bdev_io->u.bdev.md_buf,
2996 : bdev_io->u.bdev.num_blocks,
2997 : bdev_io->u.bdev.offset_blocks,
2998 : bdev_io->u.bdev.dif_check_flags,
2999 : bdev_io->u.bdev.memory_domain,
3000 : bdev_io->u.bdev.memory_domain_ctx,
3001 : bdev_io->u.bdev.accel_sequence);
3002 : } else {
3003 1 : spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
3004 1 : bdev_io->u.bdev.num_blocks * bdev->blocklen);
3005 1 : rc = 0;
3006 : }
3007 3 : break;
3008 25 : case SPDK_BDEV_IO_TYPE_WRITE:
3009 25 : rc = bdev_nvme_writev(nbdev_io,
3010 : bdev_io->u.bdev.iovs,
3011 : bdev_io->u.bdev.iovcnt,
3012 : bdev_io->u.bdev.md_buf,
3013 : bdev_io->u.bdev.num_blocks,
3014 : bdev_io->u.bdev.offset_blocks,
3015 : bdev_io->u.bdev.dif_check_flags,
3016 : bdev_io->u.bdev.memory_domain,
3017 : bdev_io->u.bdev.memory_domain_ctx,
3018 : bdev_io->u.bdev.accel_sequence,
3019 : bdev_io->u.bdev.nvme_cdw12,
3020 : bdev_io->u.bdev.nvme_cdw13);
3021 25 : break;
3022 1 : case SPDK_BDEV_IO_TYPE_COMPARE:
3023 1 : rc = bdev_nvme_comparev(nbdev_io,
3024 : bdev_io->u.bdev.iovs,
3025 : bdev_io->u.bdev.iovcnt,
3026 : bdev_io->u.bdev.md_buf,
3027 : bdev_io->u.bdev.num_blocks,
3028 : bdev_io->u.bdev.offset_blocks,
3029 : bdev_io->u.bdev.dif_check_flags);
3030 1 : break;
3031 2 : case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
3032 2 : rc = bdev_nvme_comparev_and_writev(nbdev_io,
3033 : bdev_io->u.bdev.iovs,
3034 : bdev_io->u.bdev.iovcnt,
3035 : bdev_io->u.bdev.fused_iovs,
3036 : bdev_io->u.bdev.fused_iovcnt,
3037 : bdev_io->u.bdev.md_buf,
3038 : bdev_io->u.bdev.num_blocks,
3039 : bdev_io->u.bdev.offset_blocks,
3040 : bdev_io->u.bdev.dif_check_flags);
3041 2 : break;
3042 1 : case SPDK_BDEV_IO_TYPE_UNMAP:
3043 1 : rc = bdev_nvme_unmap(nbdev_io,
3044 : bdev_io->u.bdev.offset_blocks,
3045 : bdev_io->u.bdev.num_blocks);
3046 1 : break;
3047 0 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3048 0 : rc = bdev_nvme_write_zeroes(nbdev_io,
3049 : bdev_io->u.bdev.offset_blocks,
3050 : bdev_io->u.bdev.num_blocks);
3051 0 : break;
3052 7 : case SPDK_BDEV_IO_TYPE_RESET:
3053 7 : nbdev_io->io_path = NULL;
3054 7 : bdev_nvme_reset_io(nbdev_ch, nbdev_io);
3055 7 : return;
3056 :
3057 1 : case SPDK_BDEV_IO_TYPE_FLUSH:
3058 1 : bdev_nvme_io_complete(nbdev_io, 0);
3059 1 : return;
3060 :
3061 0 : case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
3062 0 : rc = bdev_nvme_zone_appendv(nbdev_io,
3063 : bdev_io->u.bdev.iovs,
3064 : bdev_io->u.bdev.iovcnt,
3065 : bdev_io->u.bdev.md_buf,
3066 : bdev_io->u.bdev.num_blocks,
3067 : bdev_io->u.bdev.offset_blocks,
3068 : bdev_io->u.bdev.dif_check_flags);
3069 0 : break;
3070 0 : case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
3071 0 : rc = bdev_nvme_get_zone_info(nbdev_io,
3072 : bdev_io->u.zone_mgmt.zone_id,
3073 : bdev_io->u.zone_mgmt.num_zones,
3074 0 : bdev_io->u.zone_mgmt.buf);
3075 0 : break;
3076 0 : case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
3077 0 : rc = bdev_nvme_zone_management(nbdev_io,
3078 : bdev_io->u.zone_mgmt.zone_id,
3079 : bdev_io->u.zone_mgmt.zone_action);
3080 0 : break;
3081 5 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3082 5 : nbdev_io->io_path = NULL;
3083 5 : bdev_nvme_admin_passthru(nbdev_ch,
3084 : nbdev_io,
3085 : &bdev_io->u.nvme_passthru.cmd,
3086 : bdev_io->u.nvme_passthru.buf,
3087 : bdev_io->u.nvme_passthru.nbytes);
3088 5 : return;
3089 :
3090 0 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3091 0 : rc = bdev_nvme_io_passthru(nbdev_io,
3092 : &bdev_io->u.nvme_passthru.cmd,
3093 : bdev_io->u.nvme_passthru.buf,
3094 : bdev_io->u.nvme_passthru.nbytes);
3095 0 : break;
3096 0 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3097 0 : rc = bdev_nvme_io_passthru_md(nbdev_io,
3098 : &bdev_io->u.nvme_passthru.cmd,
3099 : bdev_io->u.nvme_passthru.buf,
3100 : bdev_io->u.nvme_passthru.nbytes,
3101 : bdev_io->u.nvme_passthru.md_buf,
3102 : bdev_io->u.nvme_passthru.md_len);
3103 0 : break;
3104 0 : case SPDK_BDEV_IO_TYPE_NVME_IOV_MD:
3105 0 : rc = bdev_nvme_iov_passthru_md(nbdev_io,
3106 : &bdev_io->u.nvme_passthru.cmd,
3107 : bdev_io->u.nvme_passthru.iovs,
3108 : bdev_io->u.nvme_passthru.iovcnt,
3109 : bdev_io->u.nvme_passthru.nbytes,
3110 : bdev_io->u.nvme_passthru.md_buf,
3111 : bdev_io->u.nvme_passthru.md_len);
3112 0 : break;
3113 6 : case SPDK_BDEV_IO_TYPE_ABORT:
3114 6 : nbdev_io->io_path = NULL;
3115 6 : nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
3116 6 : bdev_nvme_abort(nbdev_ch,
3117 : nbdev_io,
3118 : nbdev_io_to_abort);
3119 6 : return;
3120 :
3121 0 : case SPDK_BDEV_IO_TYPE_COPY:
3122 0 : rc = bdev_nvme_copy(nbdev_io,
3123 : bdev_io->u.bdev.offset_blocks,
3124 : bdev_io->u.bdev.copy.src_offset_blocks,
3125 : bdev_io->u.bdev.num_blocks);
3126 0 : break;
3127 0 : default:
3128 0 : rc = -EINVAL;
3129 0 : break;
3130 : }
3131 :
3132 32 : if (spdk_unlikely(rc != 0)) {
3133 0 : bdev_nvme_io_complete(nbdev_io, rc);
3134 : }
3135 : }
3136 :
3137 : static void
3138 58 : bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
3139 : {
3140 58 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
3141 58 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
3142 :
3143 58 : if (spdk_likely(nbdev_io->submit_tsc == 0)) {
3144 58 : nbdev_io->submit_tsc = spdk_bdev_io_get_submit_tsc(bdev_io);
3145 : } else {
3146 : /* There are cases where submit_tsc != 0, i.e. retry I/O.
3147 : * We need to update submit_tsc here.
3148 : */
3149 0 : nbdev_io->submit_tsc = spdk_get_ticks();
3150 : }
3151 :
3152 58 : spdk_trace_record(TRACE_BDEV_NVME_IO_START, 0, 0, (uintptr_t)nbdev_io, (uintptr_t)bdev_io);
3153 58 : nbdev_io->io_path = bdev_nvme_find_io_path(nbdev_ch);
3154 58 : if (spdk_unlikely(!nbdev_io->io_path)) {
3155 11 : if (!bdev_nvme_io_type_is_admin(bdev_io->type)) {
3156 10 : bdev_nvme_io_complete(nbdev_io, -ENXIO);
3157 10 : return;
3158 : }
3159 :
3160 : /* Admin commands do not use the optimal I/O path.
3161 : * Simply fall through even if it is not found.
3162 : */
3163 : }
3164 :
3165 48 : _bdev_nvme_submit_request(nbdev_ch, bdev_io);
3166 : }
3167 :
3168 : static bool
3169 0 : bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
3170 : {
3171 0 : struct nvme_bdev *nbdev = ctx;
3172 : struct nvme_ns *nvme_ns;
3173 : struct spdk_nvme_ns *ns;
3174 : struct spdk_nvme_ctrlr *ctrlr;
3175 : const struct spdk_nvme_ctrlr_data *cdata;
3176 :
3177 0 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
3178 0 : assert(nvme_ns != NULL);
3179 0 : ns = nvme_ns->ns;
3180 0 : if (ns == NULL) {
3181 0 : return false;
3182 : }
3183 :
3184 0 : ctrlr = spdk_nvme_ns_get_ctrlr(ns);
3185 :
3186 0 : switch (io_type) {
3187 0 : case SPDK_BDEV_IO_TYPE_READ:
3188 : case SPDK_BDEV_IO_TYPE_WRITE:
3189 : case SPDK_BDEV_IO_TYPE_RESET:
3190 : case SPDK_BDEV_IO_TYPE_FLUSH:
3191 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3192 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3193 : case SPDK_BDEV_IO_TYPE_ABORT:
3194 0 : return true;
3195 :
3196 0 : case SPDK_BDEV_IO_TYPE_COMPARE:
3197 0 : return spdk_nvme_ns_supports_compare(ns);
3198 :
3199 0 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3200 0 : return spdk_nvme_ns_get_md_size(ns) ? true : false;
3201 :
3202 0 : case SPDK_BDEV_IO_TYPE_UNMAP:
3203 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3204 0 : return cdata->oncs.dsm;
3205 :
3206 0 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3207 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3208 0 : return cdata->oncs.write_zeroes;
3209 :
3210 0 : case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
3211 0 : if (spdk_nvme_ctrlr_get_flags(ctrlr) &
3212 : SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) {
3213 0 : return true;
3214 : }
3215 0 : return false;
3216 :
3217 0 : case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
3218 : case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
3219 0 : return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS;
3220 :
3221 0 : case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
3222 0 : return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS &&
3223 0 : spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
3224 :
3225 0 : case SPDK_BDEV_IO_TYPE_COPY:
3226 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3227 0 : return cdata->oncs.copy;
3228 :
3229 0 : default:
3230 0 : return false;
3231 : }
3232 : }
3233 :
3234 : static int
3235 57 : nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrlr_ch)
3236 : {
3237 : struct nvme_qpair *nvme_qpair;
3238 : struct spdk_io_channel *pg_ch;
3239 : int rc;
3240 :
3241 57 : nvme_qpair = calloc(1, sizeof(*nvme_qpair));
3242 57 : if (!nvme_qpair) {
3243 0 : SPDK_ERRLOG("Failed to alloc nvme_qpair.\n");
3244 0 : return -1;
3245 : }
3246 :
3247 57 : TAILQ_INIT(&nvme_qpair->io_path_list);
3248 :
3249 57 : nvme_qpair->ctrlr = nvme_ctrlr;
3250 57 : nvme_qpair->ctrlr_ch = ctrlr_ch;
3251 :
3252 57 : pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);
3253 57 : if (!pg_ch) {
3254 0 : free(nvme_qpair);
3255 0 : return -1;
3256 : }
3257 :
3258 57 : nvme_qpair->group = spdk_io_channel_get_ctx(pg_ch);
3259 :
3260 : #ifdef SPDK_CONFIG_VTUNE
3261 : nvme_qpair->group->collect_spin_stat = true;
3262 : #else
3263 57 : nvme_qpair->group->collect_spin_stat = false;
3264 : #endif
3265 :
3266 57 : if (!nvme_ctrlr->disabled) {
3267 : /* If a nvme_ctrlr is disabled, don't try to create qpair for it. Qpair will
3268 : * be created when it's enabled.
3269 : */
3270 57 : rc = bdev_nvme_create_qpair(nvme_qpair);
3271 57 : if (rc != 0) {
3272 : /* nvme_ctrlr can't create IO qpair if connection is down.
3273 : * If reconnect_delay_sec is non-zero, creating IO qpair is retried
3274 : * after reconnect_delay_sec seconds. If bdev_retry_count is non-zero,
3275 : * submitted IO will be queued until IO qpair is successfully created.
3276 : *
3277 : * Hence, if both are satisfied, ignore the failure.
3278 : */
3279 0 : if (nvme_ctrlr->opts.reconnect_delay_sec == 0 || g_opts.bdev_retry_count == 0) {
3280 0 : spdk_put_io_channel(pg_ch);
3281 0 : free(nvme_qpair);
3282 0 : return rc;
3283 : }
3284 : }
3285 : }
3286 :
3287 57 : TAILQ_INSERT_TAIL(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
3288 :
3289 57 : ctrlr_ch->qpair = nvme_qpair;
3290 :
3291 57 : pthread_mutex_lock(&nvme_qpair->ctrlr->mutex);
3292 57 : nvme_qpair->ctrlr->ref++;
3293 57 : pthread_mutex_unlock(&nvme_qpair->ctrlr->mutex);
3294 :
3295 57 : return 0;
3296 : }
3297 :
3298 : static int
3299 57 : bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
3300 : {
3301 57 : struct nvme_ctrlr *nvme_ctrlr = io_device;
3302 57 : struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
3303 :
3304 57 : TAILQ_INIT(&ctrlr_ch->pending_resets);
3305 :
3306 57 : return nvme_qpair_create(nvme_ctrlr, ctrlr_ch);
3307 : }
3308 :
3309 : static void
3310 57 : nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
3311 : {
3312 : struct nvme_io_path *io_path, *next;
3313 :
3314 57 : assert(nvme_qpair->group != NULL);
3315 :
3316 92 : TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
3317 35 : TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
3318 35 : nvme_io_path_free(io_path);
3319 : }
3320 :
3321 57 : TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
3322 :
3323 57 : spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
3324 :
3325 57 : nvme_ctrlr_release(nvme_qpair->ctrlr);
3326 :
3327 57 : free(nvme_qpair);
3328 57 : }
3329 :
3330 : static void
3331 57 : bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
3332 : {
3333 57 : struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
3334 : struct nvme_qpair *nvme_qpair;
3335 :
3336 57 : nvme_qpair = ctrlr_ch->qpair;
3337 57 : assert(nvme_qpair != NULL);
3338 :
3339 57 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
3340 :
3341 57 : if (nvme_qpair->qpair != NULL) {
3342 43 : if (ctrlr_ch->reset_iter == NULL) {
3343 43 : spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
3344 : } else {
3345 : /* Skip current ctrlr_channel in a full reset sequence because
3346 : * it is being deleted now. The qpair is already being disconnected.
3347 : * We do not have to restart disconnecting it.
3348 : */
3349 0 : spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
3350 : }
3351 :
3352 : /* We cannot release a reference to the poll group now.
3353 : * The qpair may be disconnected asynchronously later.
3354 : * We need to poll it until it is actually disconnected.
3355 : * Just detach the qpair from the deleting ctrlr_channel.
3356 : */
3357 43 : nvme_qpair->ctrlr_ch = NULL;
3358 : } else {
3359 14 : assert(ctrlr_ch->reset_iter == NULL);
3360 :
3361 14 : nvme_qpair_delete(nvme_qpair);
3362 : }
3363 57 : }
3364 :
3365 : static inline struct spdk_io_channel *
3366 0 : bdev_nvme_get_accel_channel(struct nvme_poll_group *group)
3367 : {
3368 0 : if (spdk_unlikely(!group->accel_channel)) {
3369 0 : group->accel_channel = spdk_accel_get_io_channel();
3370 0 : if (!group->accel_channel) {
3371 0 : SPDK_ERRLOG("Cannot get the accel_channel for bdev nvme polling group=%p\n",
3372 : group);
3373 0 : return NULL;
3374 : }
3375 : }
3376 :
3377 0 : return group->accel_channel;
3378 : }
3379 :
3380 : static void
3381 0 : bdev_nvme_submit_accel_crc32c(void *ctx, uint32_t *dst, struct iovec *iov,
3382 : uint32_t iov_cnt, uint32_t seed,
3383 : spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
3384 : {
3385 : struct spdk_io_channel *accel_ch;
3386 0 : struct nvme_poll_group *group = ctx;
3387 : int rc;
3388 :
3389 0 : assert(cb_fn != NULL);
3390 :
3391 0 : accel_ch = bdev_nvme_get_accel_channel(group);
3392 0 : if (spdk_unlikely(accel_ch == NULL)) {
3393 0 : cb_fn(cb_arg, -ENOMEM);
3394 0 : return;
3395 : }
3396 :
3397 0 : rc = spdk_accel_submit_crc32cv(accel_ch, dst, iov, iov_cnt, seed, cb_fn, cb_arg);
3398 0 : if (rc) {
3399 : /* For the two cases, spdk_accel_submit_crc32cv does not call the user's cb_fn */
3400 0 : if (rc == -ENOMEM || rc == -EINVAL) {
3401 0 : cb_fn(cb_arg, rc);
3402 : }
3403 0 : SPDK_ERRLOG("Cannot complete the accelerated crc32c operation with iov=%p\n", iov);
3404 : }
3405 : }
3406 :
3407 : static void
3408 0 : bdev_nvme_finish_sequence(void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
3409 : {
3410 0 : spdk_accel_sequence_finish(seq, cb_fn, cb_arg);
3411 0 : }
3412 :
3413 : static void
3414 0 : bdev_nvme_abort_sequence(void *seq)
3415 : {
3416 0 : spdk_accel_sequence_abort(seq);
3417 0 : }
3418 :
3419 : static void
3420 0 : bdev_nvme_reverse_sequence(void *seq)
3421 : {
3422 0 : spdk_accel_sequence_reverse(seq);
3423 0 : }
3424 :
3425 : static int
3426 0 : bdev_nvme_append_crc32c(void *ctx, void **seq, uint32_t *dst, struct iovec *iovs, uint32_t iovcnt,
3427 : struct spdk_memory_domain *domain, void *domain_ctx, uint32_t seed,
3428 : spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
3429 : {
3430 : struct spdk_io_channel *ch;
3431 0 : struct nvme_poll_group *group = ctx;
3432 :
3433 0 : ch = bdev_nvme_get_accel_channel(group);
3434 0 : if (spdk_unlikely(ch == NULL)) {
3435 0 : return -ENOMEM;
3436 : }
3437 :
3438 0 : return spdk_accel_append_crc32c((struct spdk_accel_sequence **)seq, ch, dst, iovs, iovcnt,
3439 : domain, domain_ctx, seed, cb_fn, cb_arg);
3440 : }
3441 :
3442 : static struct spdk_nvme_accel_fn_table g_bdev_nvme_accel_fn_table = {
3443 : .table_size = sizeof(struct spdk_nvme_accel_fn_table),
3444 : .submit_accel_crc32c = bdev_nvme_submit_accel_crc32c,
3445 : .append_crc32c = bdev_nvme_append_crc32c,
3446 : .finish_sequence = bdev_nvme_finish_sequence,
3447 : .reverse_sequence = bdev_nvme_reverse_sequence,
3448 : .abort_sequence = bdev_nvme_abort_sequence,
3449 : };
3450 :
3451 : static int
3452 42 : bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf)
3453 : {
3454 42 : struct nvme_poll_group *group = ctx_buf;
3455 :
3456 42 : TAILQ_INIT(&group->qpair_list);
3457 :
3458 42 : group->group = spdk_nvme_poll_group_create(group, &g_bdev_nvme_accel_fn_table);
3459 42 : if (group->group == NULL) {
3460 0 : return -1;
3461 : }
3462 :
3463 42 : group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us);
3464 :
3465 42 : if (group->poller == NULL) {
3466 0 : spdk_nvme_poll_group_destroy(group->group);
3467 0 : return -1;
3468 : }
3469 :
3470 42 : return 0;
3471 : }
3472 :
3473 : static void
3474 42 : bdev_nvme_destroy_poll_group_cb(void *io_device, void *ctx_buf)
3475 : {
3476 42 : struct nvme_poll_group *group = ctx_buf;
3477 :
3478 42 : assert(TAILQ_EMPTY(&group->qpair_list));
3479 :
3480 42 : if (group->accel_channel) {
3481 0 : spdk_put_io_channel(group->accel_channel);
3482 : }
3483 :
3484 42 : spdk_poller_unregister(&group->poller);
3485 42 : if (spdk_nvme_poll_group_destroy(group->group)) {
3486 0 : SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n");
3487 0 : assert(false);
3488 : }
3489 42 : }
3490 :
3491 : static struct spdk_io_channel *
3492 0 : bdev_nvme_get_io_channel(void *ctx)
3493 : {
3494 0 : struct nvme_bdev *nvme_bdev = ctx;
3495 :
3496 0 : return spdk_get_io_channel(nvme_bdev);
3497 : }
3498 :
3499 : static void *
3500 0 : bdev_nvme_get_module_ctx(void *ctx)
3501 : {
3502 0 : struct nvme_bdev *nvme_bdev = ctx;
3503 : struct nvme_ns *nvme_ns;
3504 :
3505 0 : if (!nvme_bdev || nvme_bdev->disk.module != &nvme_if) {
3506 0 : return NULL;
3507 : }
3508 :
3509 0 : nvme_ns = TAILQ_FIRST(&nvme_bdev->nvme_ns_list);
3510 0 : if (!nvme_ns) {
3511 0 : return NULL;
3512 : }
3513 :
3514 0 : return nvme_ns->ns;
3515 : }
3516 :
3517 : static const char *
3518 0 : _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state)
3519 : {
3520 0 : switch (ana_state) {
3521 0 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
3522 0 : return "optimized";
3523 0 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
3524 0 : return "non_optimized";
3525 0 : case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3526 0 : return "inaccessible";
3527 0 : case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3528 0 : return "persistent_loss";
3529 0 : case SPDK_NVME_ANA_CHANGE_STATE:
3530 0 : return "change";
3531 0 : default:
3532 0 : return NULL;
3533 : }
3534 : }
3535 :
3536 : static int
3537 8 : bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
3538 : {
3539 8 : struct spdk_memory_domain **_domains = NULL;
3540 8 : struct nvme_bdev *nbdev = ctx;
3541 : struct nvme_ns *nvme_ns;
3542 8 : int i = 0, _array_size = array_size;
3543 8 : int rc = 0;
3544 :
3545 22 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
3546 14 : if (domains && array_size >= i) {
3547 11 : _domains = &domains[i];
3548 : } else {
3549 3 : _domains = NULL;
3550 : }
3551 14 : rc = spdk_nvme_ctrlr_get_memory_domains(nvme_ns->ctrlr->ctrlr, _domains, _array_size);
3552 14 : if (rc > 0) {
3553 13 : i += rc;
3554 13 : if (_array_size >= rc) {
3555 9 : _array_size -= rc;
3556 : } else {
3557 4 : _array_size = 0;
3558 : }
3559 1 : } else if (rc < 0) {
3560 0 : return rc;
3561 : }
3562 : }
3563 :
3564 8 : return i;
3565 : }
3566 :
3567 : static const char *
3568 0 : nvme_ctrlr_get_state_str(struct nvme_ctrlr *nvme_ctrlr)
3569 : {
3570 0 : if (nvme_ctrlr->destruct) {
3571 0 : return "deleting";
3572 0 : } else if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
3573 0 : return "failed";
3574 0 : } else if (nvme_ctrlr->resetting) {
3575 0 : return "resetting";
3576 0 : } else if (nvme_ctrlr->reconnect_is_delayed > 0) {
3577 0 : return "reconnect_is_delayed";
3578 0 : } else if (nvme_ctrlr->disabled) {
3579 0 : return "disabled";
3580 : } else {
3581 0 : return "enabled";
3582 : }
3583 : }
3584 :
3585 : void
3586 0 : nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr)
3587 0 : {
3588 : struct spdk_nvme_transport_id *trid;
3589 : const struct spdk_nvme_ctrlr_opts *opts;
3590 : const struct spdk_nvme_ctrlr_data *cdata;
3591 : struct nvme_path_id *path_id;
3592 :
3593 0 : spdk_json_write_object_begin(w);
3594 :
3595 0 : spdk_json_write_named_string(w, "state", nvme_ctrlr_get_state_str(nvme_ctrlr));
3596 :
3597 : #ifdef SPDK_CONFIG_NVME_CUSE
3598 0 : size_t cuse_name_size = 128;
3599 0 : char cuse_name[cuse_name_size];
3600 :
3601 0 : int rc = spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr, cuse_name, &cuse_name_size);
3602 0 : if (rc == 0) {
3603 0 : spdk_json_write_named_string(w, "cuse_device", cuse_name);
3604 : }
3605 : #endif
3606 0 : trid = &nvme_ctrlr->active_path_id->trid;
3607 0 : spdk_json_write_named_object_begin(w, "trid");
3608 0 : nvme_bdev_dump_trid_json(trid, w);
3609 0 : spdk_json_write_object_end(w);
3610 :
3611 0 : path_id = TAILQ_NEXT(nvme_ctrlr->active_path_id, link);
3612 0 : if (path_id != NULL) {
3613 0 : spdk_json_write_named_array_begin(w, "alternate_trids");
3614 : do {
3615 0 : trid = &path_id->trid;
3616 0 : spdk_json_write_object_begin(w);
3617 0 : nvme_bdev_dump_trid_json(trid, w);
3618 0 : spdk_json_write_object_end(w);
3619 :
3620 0 : path_id = TAILQ_NEXT(path_id, link);
3621 0 : } while (path_id != NULL);
3622 0 : spdk_json_write_array_end(w);
3623 : }
3624 :
3625 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
3626 0 : spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
3627 :
3628 0 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
3629 0 : spdk_json_write_named_object_begin(w, "host");
3630 0 : spdk_json_write_named_string(w, "nqn", opts->hostnqn);
3631 0 : spdk_json_write_named_string(w, "addr", opts->src_addr);
3632 0 : spdk_json_write_named_string(w, "svcid", opts->src_svcid);
3633 0 : spdk_json_write_object_end(w);
3634 :
3635 0 : spdk_json_write_object_end(w);
3636 0 : }
3637 :
3638 : static void
3639 0 : nvme_namespace_info_json(struct spdk_json_write_ctx *w,
3640 : struct nvme_ns *nvme_ns)
3641 0 : {
3642 : struct spdk_nvme_ns *ns;
3643 : struct spdk_nvme_ctrlr *ctrlr;
3644 : const struct spdk_nvme_ctrlr_data *cdata;
3645 : const struct spdk_nvme_transport_id *trid;
3646 : union spdk_nvme_vs_register vs;
3647 : const struct spdk_nvme_ns_data *nsdata;
3648 0 : char buf[128];
3649 :
3650 0 : ns = nvme_ns->ns;
3651 0 : if (ns == NULL) {
3652 0 : return;
3653 : }
3654 :
3655 0 : ctrlr = spdk_nvme_ns_get_ctrlr(ns);
3656 :
3657 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3658 0 : trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
3659 0 : vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr);
3660 :
3661 0 : spdk_json_write_object_begin(w);
3662 :
3663 0 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
3664 0 : spdk_json_write_named_string(w, "pci_address", trid->traddr);
3665 : }
3666 :
3667 0 : spdk_json_write_named_object_begin(w, "trid");
3668 :
3669 0 : nvme_bdev_dump_trid_json(trid, w);
3670 :
3671 0 : spdk_json_write_object_end(w);
3672 :
3673 : #ifdef SPDK_CONFIG_NVME_CUSE
3674 0 : size_t cuse_name_size = 128;
3675 0 : char cuse_name[cuse_name_size];
3676 :
3677 0 : int rc = spdk_nvme_cuse_get_ns_name(ctrlr, spdk_nvme_ns_get_id(ns),
3678 : cuse_name, &cuse_name_size);
3679 0 : if (rc == 0) {
3680 0 : spdk_json_write_named_string(w, "cuse_device", cuse_name);
3681 : }
3682 : #endif
3683 :
3684 0 : spdk_json_write_named_object_begin(w, "ctrlr_data");
3685 :
3686 0 : spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
3687 :
3688 0 : spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
3689 :
3690 0 : snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
3691 0 : spdk_str_trim(buf);
3692 0 : spdk_json_write_named_string(w, "model_number", buf);
3693 :
3694 0 : snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
3695 0 : spdk_str_trim(buf);
3696 0 : spdk_json_write_named_string(w, "serial_number", buf);
3697 :
3698 0 : snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
3699 0 : spdk_str_trim(buf);
3700 0 : spdk_json_write_named_string(w, "firmware_revision", buf);
3701 :
3702 0 : if (cdata->subnqn[0] != '\0') {
3703 0 : spdk_json_write_named_string(w, "subnqn", cdata->subnqn);
3704 : }
3705 :
3706 0 : spdk_json_write_named_object_begin(w, "oacs");
3707 :
3708 0 : spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
3709 0 : spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
3710 0 : spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
3711 0 : spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
3712 :
3713 0 : spdk_json_write_object_end(w);
3714 :
3715 0 : spdk_json_write_named_bool(w, "multi_ctrlr", cdata->cmic.multi_ctrlr);
3716 0 : spdk_json_write_named_bool(w, "ana_reporting", cdata->cmic.ana_reporting);
3717 :
3718 0 : spdk_json_write_object_end(w);
3719 :
3720 0 : spdk_json_write_named_object_begin(w, "vs");
3721 :
3722 0 : spdk_json_write_name(w, "nvme_version");
3723 0 : if (vs.bits.ter) {
3724 0 : spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
3725 : } else {
3726 0 : spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
3727 : }
3728 :
3729 0 : spdk_json_write_object_end(w);
3730 :
3731 0 : nsdata = spdk_nvme_ns_get_data(ns);
3732 :
3733 0 : spdk_json_write_named_object_begin(w, "ns_data");
3734 :
3735 0 : spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
3736 :
3737 0 : if (cdata->cmic.ana_reporting) {
3738 0 : spdk_json_write_named_string(w, "ana_state",
3739 : _nvme_ana_state_str(nvme_ns->ana_state));
3740 : }
3741 :
3742 0 : spdk_json_write_named_bool(w, "can_share", nsdata->nmic.can_share);
3743 :
3744 0 : spdk_json_write_object_end(w);
3745 :
3746 0 : if (cdata->oacs.security) {
3747 0 : spdk_json_write_named_object_begin(w, "security");
3748 :
3749 0 : spdk_json_write_named_bool(w, "opal", nvme_ns->bdev->opal);
3750 :
3751 0 : spdk_json_write_object_end(w);
3752 : }
3753 :
3754 0 : spdk_json_write_object_end(w);
3755 : }
3756 :
3757 : static const char *
3758 0 : nvme_bdev_get_mp_policy_str(struct nvme_bdev *nbdev)
3759 : {
3760 0 : switch (nbdev->mp_policy) {
3761 0 : case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
3762 0 : return "active_passive";
3763 0 : case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
3764 0 : return "active_active";
3765 0 : default:
3766 0 : assert(false);
3767 : return "invalid";
3768 : }
3769 : }
3770 :
3771 : static const char *
3772 0 : nvme_bdev_get_mp_selector_str(struct nvme_bdev *nbdev)
3773 : {
3774 0 : switch (nbdev->mp_selector) {
3775 0 : case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
3776 0 : return "round_robin";
3777 0 : case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
3778 0 : return "queue_depth";
3779 0 : default:
3780 0 : assert(false);
3781 : return "invalid";
3782 : }
3783 : }
3784 :
3785 : static int
3786 0 : bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
3787 : {
3788 0 : struct nvme_bdev *nvme_bdev = ctx;
3789 : struct nvme_ns *nvme_ns;
3790 :
3791 0 : pthread_mutex_lock(&nvme_bdev->mutex);
3792 0 : spdk_json_write_named_array_begin(w, "nvme");
3793 0 : TAILQ_FOREACH(nvme_ns, &nvme_bdev->nvme_ns_list, tailq) {
3794 0 : nvme_namespace_info_json(w, nvme_ns);
3795 : }
3796 0 : spdk_json_write_array_end(w);
3797 0 : spdk_json_write_named_string(w, "mp_policy", nvme_bdev_get_mp_policy_str(nvme_bdev));
3798 0 : if (nvme_bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
3799 0 : spdk_json_write_named_string(w, "selector", nvme_bdev_get_mp_selector_str(nvme_bdev));
3800 0 : if (nvme_bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
3801 0 : spdk_json_write_named_uint32(w, "rr_min_io", nvme_bdev->rr_min_io);
3802 : }
3803 : }
3804 0 : pthread_mutex_unlock(&nvme_bdev->mutex);
3805 :
3806 0 : return 0;
3807 : }
3808 :
3809 : static void
3810 0 : bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
3811 : {
3812 : /* No config per bdev needed */
3813 0 : }
3814 :
3815 : static uint64_t
3816 0 : bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
3817 : {
3818 0 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
3819 : struct nvme_io_path *io_path;
3820 : struct nvme_poll_group *group;
3821 0 : uint64_t spin_time = 0;
3822 :
3823 0 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
3824 0 : group = io_path->qpair->group;
3825 :
3826 0 : if (!group || !group->collect_spin_stat) {
3827 0 : continue;
3828 : }
3829 :
3830 0 : if (group->end_ticks != 0) {
3831 0 : group->spin_ticks += (group->end_ticks - group->start_ticks);
3832 0 : group->end_ticks = 0;
3833 : }
3834 :
3835 0 : spin_time += group->spin_ticks;
3836 0 : group->start_ticks = 0;
3837 0 : group->spin_ticks = 0;
3838 : }
3839 :
3840 0 : return (spin_time * 1000000ULL) / spdk_get_ticks_hz();
3841 : }
3842 :
3843 : static void
3844 0 : bdev_nvme_reset_device_stat(void *ctx)
3845 : {
3846 0 : struct nvme_bdev *nbdev = ctx;
3847 :
3848 0 : if (nbdev->err_stat != NULL) {
3849 0 : memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat));
3850 : }
3851 0 : }
3852 :
3853 : /* JSON string should be lowercases and underscore delimited string. */
3854 : static void
3855 0 : bdev_nvme_format_nvme_status(char *dst, const char *src)
3856 : {
3857 0 : char tmp[256];
3858 :
3859 0 : spdk_strcpy_replace(dst, 256, src, " - ", "_");
3860 0 : spdk_strcpy_replace(tmp, 256, dst, "-", "_");
3861 0 : spdk_strcpy_replace(dst, 256, tmp, " ", "_");
3862 0 : spdk_strlwr(dst);
3863 0 : }
3864 :
3865 : static void
3866 0 : bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w)
3867 : {
3868 0 : struct nvme_bdev *nbdev = ctx;
3869 0 : struct spdk_nvme_status status = {};
3870 : uint16_t sct, sc;
3871 0 : char status_json[256];
3872 : const char *status_str;
3873 :
3874 0 : if (nbdev->err_stat == NULL) {
3875 0 : return;
3876 : }
3877 :
3878 0 : spdk_json_write_named_object_begin(w, "nvme_error");
3879 :
3880 0 : spdk_json_write_named_object_begin(w, "status_type");
3881 0 : for (sct = 0; sct < 8; sct++) {
3882 0 : if (nbdev->err_stat->status_type[sct] == 0) {
3883 0 : continue;
3884 : }
3885 0 : status.sct = sct;
3886 :
3887 0 : status_str = spdk_nvme_cpl_get_status_type_string(&status);
3888 0 : assert(status_str != NULL);
3889 0 : bdev_nvme_format_nvme_status(status_json, status_str);
3890 :
3891 0 : spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]);
3892 : }
3893 0 : spdk_json_write_object_end(w);
3894 :
3895 0 : spdk_json_write_named_object_begin(w, "status_code");
3896 0 : for (sct = 0; sct < 4; sct++) {
3897 0 : status.sct = sct;
3898 0 : for (sc = 0; sc < 256; sc++) {
3899 0 : if (nbdev->err_stat->status[sct][sc] == 0) {
3900 0 : continue;
3901 : }
3902 0 : status.sc = sc;
3903 :
3904 0 : status_str = spdk_nvme_cpl_get_status_string(&status);
3905 0 : assert(status_str != NULL);
3906 0 : bdev_nvme_format_nvme_status(status_json, status_str);
3907 :
3908 0 : spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]);
3909 : }
3910 : }
3911 0 : spdk_json_write_object_end(w);
3912 :
3913 0 : spdk_json_write_object_end(w);
3914 : }
3915 :
3916 : static bool
3917 0 : bdev_nvme_accel_sequence_supported(void *ctx, enum spdk_bdev_io_type type)
3918 : {
3919 0 : struct nvme_bdev *nbdev = ctx;
3920 : struct spdk_nvme_ctrlr *ctrlr;
3921 :
3922 0 : if (!g_opts.allow_accel_sequence) {
3923 0 : return false;
3924 : }
3925 :
3926 0 : switch (type) {
3927 0 : case SPDK_BDEV_IO_TYPE_WRITE:
3928 : case SPDK_BDEV_IO_TYPE_READ:
3929 0 : break;
3930 0 : default:
3931 0 : return false;
3932 : }
3933 :
3934 0 : ctrlr = bdev_nvme_get_ctrlr(&nbdev->disk);
3935 0 : assert(ctrlr != NULL);
3936 :
3937 0 : return spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
3938 : }
3939 :
3940 : static const struct spdk_bdev_fn_table nvmelib_fn_table = {
3941 : .destruct = bdev_nvme_destruct,
3942 : .submit_request = bdev_nvme_submit_request,
3943 : .io_type_supported = bdev_nvme_io_type_supported,
3944 : .get_io_channel = bdev_nvme_get_io_channel,
3945 : .dump_info_json = bdev_nvme_dump_info_json,
3946 : .write_config_json = bdev_nvme_write_config_json,
3947 : .get_spin_time = bdev_nvme_get_spin_time,
3948 : .get_module_ctx = bdev_nvme_get_module_ctx,
3949 : .get_memory_domains = bdev_nvme_get_memory_domains,
3950 : .accel_sequence_supported = bdev_nvme_accel_sequence_supported,
3951 : .reset_device_stat = bdev_nvme_reset_device_stat,
3952 : .dump_device_stat_json = bdev_nvme_dump_device_stat_json,
3953 : };
3954 :
3955 : typedef int (*bdev_nvme_parse_ana_log_page_cb)(
3956 : const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg);
3957 :
3958 : static int
3959 41 : bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
3960 : bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
3961 : {
3962 : struct spdk_nvme_ana_group_descriptor *copied_desc;
3963 : uint8_t *orig_desc;
3964 : uint32_t i, desc_size, copy_len;
3965 41 : int rc = 0;
3966 :
3967 41 : if (nvme_ctrlr->ana_log_page == NULL) {
3968 0 : return -EINVAL;
3969 : }
3970 :
3971 41 : copied_desc = nvme_ctrlr->copied_ana_desc;
3972 :
3973 41 : orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
3974 41 : copy_len = nvme_ctrlr->max_ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
3975 :
3976 71 : for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) {
3977 66 : memcpy(copied_desc, orig_desc, copy_len);
3978 :
3979 66 : rc = cb_fn(copied_desc, cb_arg);
3980 66 : if (rc != 0) {
3981 36 : break;
3982 : }
3983 :
3984 30 : desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
3985 30 : copied_desc->num_of_nsid * sizeof(uint32_t);
3986 30 : orig_desc += desc_size;
3987 30 : copy_len -= desc_size;
3988 : }
3989 :
3990 41 : return rc;
3991 : }
3992 :
3993 : static int
3994 5 : nvme_ns_ana_transition_timedout(void *ctx)
3995 : {
3996 5 : struct nvme_ns *nvme_ns = ctx;
3997 :
3998 5 : spdk_poller_unregister(&nvme_ns->anatt_timer);
3999 5 : nvme_ns->ana_transition_timedout = true;
4000 :
4001 5 : return SPDK_POLLER_BUSY;
4002 : }
4003 :
4004 : static void
4005 45 : _nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
4006 : const struct spdk_nvme_ana_group_descriptor *desc)
4007 : {
4008 : const struct spdk_nvme_ctrlr_data *cdata;
4009 :
4010 45 : nvme_ns->ana_group_id = desc->ana_group_id;
4011 45 : nvme_ns->ana_state = desc->ana_state;
4012 45 : nvme_ns->ana_state_updating = false;
4013 :
4014 45 : switch (nvme_ns->ana_state) {
4015 38 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
4016 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
4017 38 : nvme_ns->ana_transition_timedout = false;
4018 38 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4019 38 : break;
4020 :
4021 6 : case SPDK_NVME_ANA_INACCESSIBLE_STATE:
4022 : case SPDK_NVME_ANA_CHANGE_STATE:
4023 6 : if (nvme_ns->anatt_timer != NULL) {
4024 1 : break;
4025 : }
4026 :
4027 5 : cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
4028 5 : nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
4029 : nvme_ns,
4030 : cdata->anatt * SPDK_SEC_TO_USEC);
4031 5 : break;
4032 1 : default:
4033 1 : break;
4034 : }
4035 45 : }
4036 :
4037 : static int
4038 59 : nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg)
4039 : {
4040 59 : struct nvme_ns *nvme_ns = cb_arg;
4041 : uint32_t i;
4042 :
4043 59 : assert(nvme_ns->ns != NULL);
4044 :
4045 81 : for (i = 0; i < desc->num_of_nsid; i++) {
4046 58 : if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) {
4047 22 : continue;
4048 : }
4049 :
4050 36 : _nvme_ns_set_ana_state(nvme_ns, desc);
4051 36 : return 1;
4052 : }
4053 :
4054 23 : return 0;
4055 : }
4056 :
4057 : static int
4058 5 : nvme_generate_uuid(const char *sn, uint32_t nsid, struct spdk_uuid *uuid)
4059 : {
4060 5 : int rc = 0;
4061 5 : struct spdk_uuid new_uuid, namespace_uuid;
4062 5 : char merged_str[SPDK_NVME_CTRLR_SN_LEN + NSID_STR_LEN + 1] = {'\0'};
4063 : /* This namespace UUID was generated using uuid_generate() method. */
4064 5 : const char *namespace_str = {"edaed2de-24bc-4b07-b559-f47ecbe730fd"};
4065 : int size;
4066 :
4067 5 : assert(strlen(sn) <= SPDK_NVME_CTRLR_SN_LEN);
4068 :
4069 5 : spdk_uuid_set_null(&new_uuid);
4070 5 : spdk_uuid_set_null(&namespace_uuid);
4071 :
4072 5 : size = snprintf(merged_str, sizeof(merged_str), "%s%"PRIu32, sn, nsid);
4073 5 : if (size <= 0 || (unsigned long)size >= sizeof(merged_str)) {
4074 0 : return -EINVAL;
4075 : }
4076 :
4077 5 : spdk_uuid_parse(&namespace_uuid, namespace_str);
4078 :
4079 5 : rc = spdk_uuid_generate_sha1(&new_uuid, &namespace_uuid, merged_str, size);
4080 5 : if (rc == 0) {
4081 5 : memcpy(uuid, &new_uuid, sizeof(struct spdk_uuid));
4082 : }
4083 :
4084 5 : return rc;
4085 : }
4086 :
4087 : static int
4088 37 : nvme_disk_create(struct spdk_bdev *disk, const char *base_name,
4089 : struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns,
4090 : uint32_t prchk_flags, void *ctx)
4091 : {
4092 : const struct spdk_uuid *uuid;
4093 : const uint8_t *nguid;
4094 : const struct spdk_nvme_ctrlr_data *cdata;
4095 : const struct spdk_nvme_ns_data *nsdata;
4096 : const struct spdk_nvme_ctrlr_opts *opts;
4097 : enum spdk_nvme_csi csi;
4098 : uint32_t atomic_bs, phys_bs, bs;
4099 37 : char sn_tmp[SPDK_NVME_CTRLR_SN_LEN + 1] = {'\0'};
4100 : int rc;
4101 :
4102 37 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
4103 37 : csi = spdk_nvme_ns_get_csi(ns);
4104 37 : opts = spdk_nvme_ctrlr_get_opts(ctrlr);
4105 :
4106 37 : switch (csi) {
4107 37 : case SPDK_NVME_CSI_NVM:
4108 37 : disk->product_name = "NVMe disk";
4109 37 : break;
4110 0 : case SPDK_NVME_CSI_ZNS:
4111 0 : disk->product_name = "NVMe ZNS disk";
4112 0 : disk->zoned = true;
4113 0 : disk->zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
4114 0 : disk->max_zone_append_size = spdk_nvme_zns_ctrlr_get_max_zone_append_size(ctrlr) /
4115 0 : spdk_nvme_ns_get_extended_sector_size(ns);
4116 0 : disk->max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(ns);
4117 0 : disk->max_active_zones = spdk_nvme_zns_ns_get_max_active_zones(ns);
4118 0 : break;
4119 0 : default:
4120 0 : SPDK_ERRLOG("unsupported CSI: %u\n", csi);
4121 0 : return -ENOTSUP;
4122 : }
4123 :
4124 37 : nguid = spdk_nvme_ns_get_nguid(ns);
4125 37 : if (!nguid) {
4126 37 : uuid = spdk_nvme_ns_get_uuid(ns);
4127 37 : if (uuid) {
4128 12 : disk->uuid = *uuid;
4129 25 : } else if (g_opts.generate_uuids) {
4130 0 : spdk_strcpy_pad(sn_tmp, cdata->sn, SPDK_NVME_CTRLR_SN_LEN, '\0');
4131 0 : rc = nvme_generate_uuid(sn_tmp, spdk_nvme_ns_get_id(ns), &disk->uuid);
4132 0 : if (rc < 0) {
4133 0 : SPDK_ERRLOG("UUID generation failed (%s)\n", spdk_strerror(-rc));
4134 0 : return rc;
4135 : }
4136 : }
4137 : } else {
4138 0 : memcpy(&disk->uuid, nguid, sizeof(disk->uuid));
4139 : }
4140 :
4141 37 : disk->name = spdk_sprintf_alloc("%sn%d", base_name, spdk_nvme_ns_get_id(ns));
4142 37 : if (!disk->name) {
4143 0 : return -ENOMEM;
4144 : }
4145 :
4146 37 : disk->write_cache = 0;
4147 37 : if (cdata->vwc.present) {
4148 : /* Enable if the Volatile Write Cache exists */
4149 0 : disk->write_cache = 1;
4150 : }
4151 37 : if (cdata->oncs.write_zeroes) {
4152 0 : disk->max_write_zeroes = UINT16_MAX + 1;
4153 : }
4154 37 : disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
4155 37 : disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns);
4156 37 : disk->max_segment_size = spdk_nvme_ctrlr_get_max_xfer_size(ctrlr);
4157 37 : disk->ctratt.raw = cdata->ctratt.raw;
4158 : /* NVMe driver will split one request into multiple requests
4159 : * based on MDTS and stripe boundary, the bdev layer will use
4160 : * max_segment_size and max_num_segments to split one big IO
4161 : * into multiple requests, then small request can't run out
4162 : * of NVMe internal requests data structure.
4163 : */
4164 37 : if (opts && opts->io_queue_requests) {
4165 0 : disk->max_num_segments = opts->io_queue_requests / 2;
4166 : }
4167 37 : if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_SGL_SUPPORTED) {
4168 : /* The nvme driver will try to split I/O that have too many
4169 : * SGEs, but it doesn't work if that last SGE doesn't end on
4170 : * an aggregate total that is block aligned. The bdev layer has
4171 : * a more robust splitting framework, so use that instead for
4172 : * this case. (See issue #3269.)
4173 : */
4174 0 : uint16_t max_sges = spdk_nvme_ctrlr_get_max_sges(ctrlr);
4175 :
4176 0 : if (disk->max_num_segments == 0) {
4177 0 : disk->max_num_segments = max_sges;
4178 : } else {
4179 0 : disk->max_num_segments = spdk_min(disk->max_num_segments, max_sges);
4180 : }
4181 : }
4182 37 : disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
4183 :
4184 37 : nsdata = spdk_nvme_ns_get_data(ns);
4185 37 : bs = spdk_nvme_ns_get_sector_size(ns);
4186 37 : atomic_bs = bs;
4187 37 : phys_bs = bs;
4188 37 : if (nsdata->nabo == 0) {
4189 37 : if (nsdata->nsfeat.ns_atomic_write_unit && nsdata->nawupf) {
4190 0 : atomic_bs = bs * (1 + nsdata->nawupf);
4191 : } else {
4192 37 : atomic_bs = bs * (1 + cdata->awupf);
4193 : }
4194 : }
4195 37 : if (nsdata->nsfeat.optperf) {
4196 0 : phys_bs = bs * (1 + nsdata->npwg);
4197 : }
4198 37 : disk->phys_blocklen = spdk_min(phys_bs, atomic_bs);
4199 :
4200 37 : disk->md_len = spdk_nvme_ns_get_md_size(ns);
4201 37 : if (disk->md_len != 0) {
4202 0 : disk->md_interleave = nsdata->flbas.extended;
4203 0 : disk->dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
4204 0 : if (disk->dif_type != SPDK_DIF_DISABLE) {
4205 0 : disk->dif_is_head_of_md = nsdata->dps.md_start;
4206 0 : disk->dif_check_flags = prchk_flags;
4207 0 : disk->dif_pi_format = (enum spdk_dif_pi_format)spdk_nvme_ns_get_pi_format(ns);
4208 : }
4209 : }
4210 :
4211 37 : if (!(spdk_nvme_ctrlr_get_flags(ctrlr) &
4212 : SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED)) {
4213 37 : disk->acwu = 0;
4214 0 : } else if (nsdata->nsfeat.ns_atomic_write_unit) {
4215 0 : disk->acwu = nsdata->nacwu + 1; /* 0-based */
4216 : } else {
4217 0 : disk->acwu = cdata->acwu + 1; /* 0-based */
4218 : }
4219 :
4220 37 : if (cdata->oncs.copy) {
4221 : /* For now bdev interface allows only single segment copy */
4222 0 : disk->max_copy = nsdata->mssrl;
4223 : }
4224 :
4225 37 : disk->ctxt = ctx;
4226 37 : disk->fn_table = &nvmelib_fn_table;
4227 37 : disk->module = &nvme_if;
4228 :
4229 37 : return 0;
4230 : }
4231 :
4232 : static struct nvme_bdev *
4233 37 : nvme_bdev_alloc(void)
4234 : {
4235 : struct nvme_bdev *bdev;
4236 : int rc;
4237 :
4238 37 : bdev = calloc(1, sizeof(*bdev));
4239 37 : if (!bdev) {
4240 0 : SPDK_ERRLOG("bdev calloc() failed\n");
4241 0 : return NULL;
4242 : }
4243 :
4244 37 : if (g_opts.nvme_error_stat) {
4245 0 : bdev->err_stat = calloc(1, sizeof(struct nvme_error_stat));
4246 0 : if (!bdev->err_stat) {
4247 0 : SPDK_ERRLOG("err_stat calloc() failed\n");
4248 0 : free(bdev);
4249 0 : return NULL;
4250 : }
4251 : }
4252 :
4253 37 : rc = pthread_mutex_init(&bdev->mutex, NULL);
4254 37 : if (rc != 0) {
4255 0 : free(bdev->err_stat);
4256 0 : free(bdev);
4257 0 : return NULL;
4258 : }
4259 :
4260 37 : bdev->ref = 1;
4261 37 : bdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE;
4262 37 : bdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN;
4263 37 : bdev->rr_min_io = UINT32_MAX;
4264 37 : TAILQ_INIT(&bdev->nvme_ns_list);
4265 :
4266 37 : return bdev;
4267 : }
4268 :
4269 : static int
4270 37 : nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4271 : {
4272 : struct nvme_bdev *bdev;
4273 37 : struct nvme_bdev_ctrlr *nbdev_ctrlr = nvme_ctrlr->nbdev_ctrlr;
4274 : int rc;
4275 :
4276 37 : bdev = nvme_bdev_alloc();
4277 37 : if (bdev == NULL) {
4278 0 : SPDK_ERRLOG("Failed to allocate NVMe bdev\n");
4279 0 : return -ENOMEM;
4280 : }
4281 :
4282 37 : bdev->opal = nvme_ctrlr->opal_dev != NULL;
4283 :
4284 37 : rc = nvme_disk_create(&bdev->disk, nbdev_ctrlr->name, nvme_ctrlr->ctrlr,
4285 : nvme_ns->ns, nvme_ctrlr->opts.prchk_flags, bdev);
4286 37 : if (rc != 0) {
4287 0 : SPDK_ERRLOG("Failed to create NVMe disk\n");
4288 0 : nvme_bdev_free(bdev);
4289 0 : return rc;
4290 : }
4291 :
4292 37 : spdk_io_device_register(bdev,
4293 : bdev_nvme_create_bdev_channel_cb,
4294 : bdev_nvme_destroy_bdev_channel_cb,
4295 : sizeof(struct nvme_bdev_channel),
4296 37 : bdev->disk.name);
4297 :
4298 37 : nvme_ns->bdev = bdev;
4299 37 : bdev->nsid = nvme_ns->id;
4300 37 : TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
4301 :
4302 37 : bdev->nbdev_ctrlr = nbdev_ctrlr;
4303 37 : TAILQ_INSERT_TAIL(&nbdev_ctrlr->bdevs, bdev, tailq);
4304 :
4305 37 : rc = spdk_bdev_register(&bdev->disk);
4306 37 : if (rc != 0) {
4307 1 : SPDK_ERRLOG("spdk_bdev_register() failed\n");
4308 1 : spdk_io_device_unregister(bdev, NULL);
4309 1 : nvme_ns->bdev = NULL;
4310 1 : TAILQ_REMOVE(&nbdev_ctrlr->bdevs, bdev, tailq);
4311 1 : nvme_bdev_free(bdev);
4312 1 : return rc;
4313 : }
4314 :
4315 36 : return 0;
4316 : }
4317 :
4318 : static bool
4319 23 : bdev_nvme_compare_ns(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
4320 : {
4321 : const struct spdk_nvme_ns_data *nsdata1, *nsdata2;
4322 : const struct spdk_uuid *uuid1, *uuid2;
4323 :
4324 23 : nsdata1 = spdk_nvme_ns_get_data(ns1);
4325 23 : nsdata2 = spdk_nvme_ns_get_data(ns2);
4326 23 : uuid1 = spdk_nvme_ns_get_uuid(ns1);
4327 23 : uuid2 = spdk_nvme_ns_get_uuid(ns2);
4328 :
4329 45 : return memcmp(nsdata1->nguid, nsdata2->nguid, sizeof(nsdata1->nguid)) == 0 &&
4330 22 : nsdata1->eui64 == nsdata2->eui64 &&
4331 21 : ((uuid1 == NULL && uuid2 == NULL) ||
4332 59 : (uuid1 != NULL && uuid2 != NULL && spdk_uuid_compare(uuid1, uuid2) == 0)) &&
4333 18 : spdk_nvme_ns_get_csi(ns1) == spdk_nvme_ns_get_csi(ns2);
4334 : }
4335 :
4336 : static bool
4337 0 : hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
4338 : struct spdk_nvme_ctrlr_opts *opts)
4339 : {
4340 : struct nvme_probe_skip_entry *entry;
4341 :
4342 0 : TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
4343 0 : if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
4344 0 : return false;
4345 : }
4346 : }
4347 :
4348 0 : opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
4349 0 : opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
4350 0 : opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
4351 0 : opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
4352 0 : opts->disable_read_ana_log_page = true;
4353 :
4354 0 : SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr);
4355 :
4356 0 : return true;
4357 : }
4358 :
4359 : static void
4360 0 : nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
4361 : {
4362 0 : struct nvme_ctrlr *nvme_ctrlr = ctx;
4363 :
4364 0 : if (spdk_nvme_cpl_is_error(cpl)) {
4365 0 : SPDK_WARNLOG("Abort failed. Resetting controller. sc is %u, sct is %u.\n", cpl->status.sc,
4366 : cpl->status.sct);
4367 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4368 0 : } else if (cpl->cdw0 & 0x1) {
4369 0 : SPDK_WARNLOG("Specified command could not be aborted.\n");
4370 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4371 : }
4372 0 : }
4373 :
4374 : static void
4375 0 : timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
4376 : struct spdk_nvme_qpair *qpair, uint16_t cid)
4377 : {
4378 0 : struct nvme_ctrlr *nvme_ctrlr = cb_arg;
4379 : union spdk_nvme_csts_register csts;
4380 : int rc;
4381 :
4382 0 : assert(nvme_ctrlr->ctrlr == ctrlr);
4383 :
4384 0 : SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
4385 :
4386 : /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O
4387 : * queue. (Note: qpair == NULL when there's an admin cmd timeout.) Otherwise we
4388 : * would submit another fabrics cmd on the admin queue to read CSTS and check for its
4389 : * completion recursively.
4390 : */
4391 0 : if (nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE || qpair != NULL) {
4392 0 : csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
4393 0 : if (csts.bits.cfs) {
4394 0 : SPDK_ERRLOG("Controller Fatal Status, reset required\n");
4395 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4396 0 : return;
4397 : }
4398 : }
4399 :
4400 0 : switch (g_opts.action_on_timeout) {
4401 0 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
4402 0 : if (qpair) {
4403 : /* Don't send abort to ctrlr when ctrlr is not available. */
4404 0 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4405 0 : if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
4406 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4407 0 : SPDK_NOTICELOG("Quit abort. Ctrlr is not available.\n");
4408 0 : return;
4409 : }
4410 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4411 :
4412 0 : rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
4413 : nvme_abort_cpl, nvme_ctrlr);
4414 0 : if (rc == 0) {
4415 0 : return;
4416 : }
4417 :
4418 0 : SPDK_ERRLOG("Unable to send abort. Resetting, rc is %d.\n", rc);
4419 : }
4420 :
4421 : /* FALLTHROUGH */
4422 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
4423 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4424 0 : break;
4425 0 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
4426 0 : SPDK_DEBUGLOG(bdev_nvme, "No action for nvme controller timeout.\n");
4427 0 : break;
4428 0 : default:
4429 0 : SPDK_ERRLOG("An invalid timeout action value is found.\n");
4430 0 : break;
4431 : }
4432 : }
4433 :
4434 : static struct nvme_ns *
4435 50 : nvme_ns_alloc(void)
4436 : {
4437 : struct nvme_ns *nvme_ns;
4438 :
4439 50 : nvme_ns = calloc(1, sizeof(struct nvme_ns));
4440 50 : if (nvme_ns == NULL) {
4441 0 : return NULL;
4442 : }
4443 :
4444 50 : if (g_opts.io_path_stat) {
4445 0 : nvme_ns->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
4446 0 : if (nvme_ns->stat == NULL) {
4447 0 : free(nvme_ns);
4448 0 : return NULL;
4449 : }
4450 0 : spdk_bdev_reset_io_stat(nvme_ns->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
4451 : }
4452 :
4453 50 : return nvme_ns;
4454 : }
4455 :
4456 : static void
4457 50 : nvme_ns_free(struct nvme_ns *nvme_ns)
4458 : {
4459 50 : free(nvme_ns->stat);
4460 50 : free(nvme_ns);
4461 50 : }
4462 :
4463 : static void
4464 50 : nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc)
4465 : {
4466 50 : struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
4467 50 : struct nvme_async_probe_ctx *ctx = nvme_ns->probe_ctx;
4468 :
4469 50 : if (rc == 0) {
4470 48 : nvme_ns->probe_ctx = NULL;
4471 48 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4472 48 : nvme_ctrlr->ref++;
4473 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4474 : } else {
4475 2 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4476 2 : nvme_ns_free(nvme_ns);
4477 : }
4478 :
4479 50 : if (ctx) {
4480 49 : ctx->populates_in_progress--;
4481 49 : if (ctx->populates_in_progress == 0) {
4482 12 : nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
4483 : }
4484 : }
4485 50 : }
4486 :
4487 : static void
4488 2 : bdev_nvme_add_io_path(struct spdk_io_channel_iter *i)
4489 : {
4490 2 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
4491 2 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
4492 2 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4493 : int rc;
4494 :
4495 2 : rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
4496 2 : if (rc != 0) {
4497 0 : SPDK_ERRLOG("Failed to add I/O path to bdev_channel dynamically.\n");
4498 : }
4499 :
4500 2 : spdk_for_each_channel_continue(i, rc);
4501 2 : }
4502 :
4503 : static void
4504 2 : bdev_nvme_delete_io_path(struct spdk_io_channel_iter *i)
4505 : {
4506 2 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
4507 2 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
4508 2 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4509 : struct nvme_io_path *io_path;
4510 :
4511 2 : io_path = _bdev_nvme_get_io_path(nbdev_ch, nvme_ns);
4512 2 : if (io_path != NULL) {
4513 2 : _bdev_nvme_delete_io_path(nbdev_ch, io_path);
4514 : }
4515 :
4516 2 : spdk_for_each_channel_continue(i, 0);
4517 2 : }
4518 :
4519 : static void
4520 0 : bdev_nvme_add_io_path_failed(struct spdk_io_channel_iter *i, int status)
4521 : {
4522 0 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4523 :
4524 0 : nvme_ctrlr_populate_namespace_done(nvme_ns, -1);
4525 0 : }
4526 :
4527 : static void
4528 12 : bdev_nvme_add_io_path_done(struct spdk_io_channel_iter *i, int status)
4529 : {
4530 12 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4531 12 : struct nvme_bdev *bdev = spdk_io_channel_iter_get_io_device(i);
4532 :
4533 12 : if (status == 0) {
4534 12 : nvme_ctrlr_populate_namespace_done(nvme_ns, 0);
4535 : } else {
4536 : /* Delete the added io_paths and fail populating the namespace. */
4537 0 : spdk_for_each_channel(bdev,
4538 : bdev_nvme_delete_io_path,
4539 : nvme_ns,
4540 : bdev_nvme_add_io_path_failed);
4541 : }
4542 12 : }
4543 :
4544 : static int
4545 13 : nvme_bdev_add_ns(struct nvme_bdev *bdev, struct nvme_ns *nvme_ns)
4546 : {
4547 : struct nvme_ns *tmp_ns;
4548 : const struct spdk_nvme_ns_data *nsdata;
4549 :
4550 13 : nsdata = spdk_nvme_ns_get_data(nvme_ns->ns);
4551 13 : if (!nsdata->nmic.can_share) {
4552 0 : SPDK_ERRLOG("Namespace cannot be shared.\n");
4553 0 : return -EINVAL;
4554 : }
4555 :
4556 13 : pthread_mutex_lock(&bdev->mutex);
4557 :
4558 13 : tmp_ns = TAILQ_FIRST(&bdev->nvme_ns_list);
4559 13 : assert(tmp_ns != NULL);
4560 :
4561 13 : if (tmp_ns->ns != NULL && !bdev_nvme_compare_ns(nvme_ns->ns, tmp_ns->ns)) {
4562 1 : pthread_mutex_unlock(&bdev->mutex);
4563 1 : SPDK_ERRLOG("Namespaces are not identical.\n");
4564 1 : return -EINVAL;
4565 : }
4566 :
4567 12 : bdev->ref++;
4568 12 : TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
4569 12 : nvme_ns->bdev = bdev;
4570 :
4571 12 : pthread_mutex_unlock(&bdev->mutex);
4572 :
4573 : /* Add nvme_io_path to nvme_bdev_channels dynamically. */
4574 12 : spdk_for_each_channel(bdev,
4575 : bdev_nvme_add_io_path,
4576 : nvme_ns,
4577 : bdev_nvme_add_io_path_done);
4578 :
4579 12 : return 0;
4580 : }
4581 :
4582 : static void
4583 50 : nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4584 : {
4585 : struct spdk_nvme_ns *ns;
4586 : struct nvme_bdev *bdev;
4587 50 : int rc = 0;
4588 :
4589 50 : ns = spdk_nvme_ctrlr_get_ns(nvme_ctrlr->ctrlr, nvme_ns->id);
4590 50 : if (!ns) {
4591 0 : SPDK_DEBUGLOG(bdev_nvme, "Invalid NS %d\n", nvme_ns->id);
4592 0 : rc = -EINVAL;
4593 0 : goto done;
4594 : }
4595 :
4596 50 : nvme_ns->ns = ns;
4597 50 : nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
4598 :
4599 50 : if (nvme_ctrlr->ana_log_page != NULL) {
4600 37 : bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns);
4601 : }
4602 :
4603 50 : bdev = nvme_bdev_ctrlr_get_bdev(nvme_ctrlr->nbdev_ctrlr, nvme_ns->id);
4604 50 : if (bdev == NULL) {
4605 37 : rc = nvme_bdev_create(nvme_ctrlr, nvme_ns);
4606 : } else {
4607 13 : rc = nvme_bdev_add_ns(bdev, nvme_ns);
4608 13 : if (rc == 0) {
4609 12 : return;
4610 : }
4611 : }
4612 1 : done:
4613 38 : nvme_ctrlr_populate_namespace_done(nvme_ns, rc);
4614 : }
4615 :
4616 : static void
4617 48 : nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns)
4618 : {
4619 48 : struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
4620 :
4621 48 : assert(nvme_ctrlr != NULL);
4622 :
4623 48 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4624 :
4625 48 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4626 :
4627 48 : if (nvme_ns->bdev != NULL) {
4628 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4629 0 : return;
4630 : }
4631 :
4632 48 : nvme_ns_free(nvme_ns);
4633 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4634 :
4635 48 : nvme_ctrlr_release(nvme_ctrlr);
4636 : }
4637 :
4638 : static void
4639 11 : bdev_nvme_delete_io_path_done(struct spdk_io_channel_iter *i, int status)
4640 : {
4641 11 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4642 :
4643 11 : nvme_ctrlr_depopulate_namespace_done(nvme_ns);
4644 11 : }
4645 :
4646 : static void
4647 48 : nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4648 : {
4649 : struct nvme_bdev *bdev;
4650 :
4651 48 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4652 :
4653 48 : bdev = nvme_ns->bdev;
4654 48 : if (bdev != NULL) {
4655 44 : pthread_mutex_lock(&bdev->mutex);
4656 :
4657 44 : assert(bdev->ref > 0);
4658 44 : bdev->ref--;
4659 44 : if (bdev->ref == 0) {
4660 33 : pthread_mutex_unlock(&bdev->mutex);
4661 :
4662 33 : spdk_bdev_unregister(&bdev->disk, NULL, NULL);
4663 : } else {
4664 : /* spdk_bdev_unregister() is not called until the last nvme_ns is
4665 : * depopulated. Hence we need to remove nvme_ns from bdev->nvme_ns_list
4666 : * and clear nvme_ns->bdev here.
4667 : */
4668 11 : TAILQ_REMOVE(&bdev->nvme_ns_list, nvme_ns, tailq);
4669 11 : nvme_ns->bdev = NULL;
4670 :
4671 11 : pthread_mutex_unlock(&bdev->mutex);
4672 :
4673 : /* Delete nvme_io_paths from nvme_bdev_channels dynamically. After that,
4674 : * we call depopulate_namespace_done() to avoid use-after-free.
4675 : */
4676 11 : spdk_for_each_channel(bdev,
4677 : bdev_nvme_delete_io_path,
4678 : nvme_ns,
4679 : bdev_nvme_delete_io_path_done);
4680 11 : return;
4681 : }
4682 : }
4683 :
4684 37 : nvme_ctrlr_depopulate_namespace_done(nvme_ns);
4685 : }
4686 :
4687 : static void
4688 61 : nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
4689 : struct nvme_async_probe_ctx *ctx)
4690 : {
4691 61 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
4692 : struct nvme_ns *nvme_ns, *next;
4693 : struct spdk_nvme_ns *ns;
4694 : struct nvme_bdev *bdev;
4695 : uint32_t nsid;
4696 : int rc;
4697 : uint64_t num_sectors;
4698 :
4699 61 : if (ctx) {
4700 : /* Initialize this count to 1 to handle the populate functions
4701 : * calling nvme_ctrlr_populate_namespace_done() immediately.
4702 : */
4703 45 : ctx->populates_in_progress = 1;
4704 : }
4705 :
4706 : /* First loop over our existing namespaces and see if they have been
4707 : * removed. */
4708 61 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
4709 65 : while (nvme_ns != NULL) {
4710 4 : next = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
4711 :
4712 4 : if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
4713 : /* NS is still there or added again. Its attributes may have changed. */
4714 3 : ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id);
4715 3 : if (nvme_ns->ns != ns) {
4716 1 : assert(nvme_ns->ns == NULL);
4717 1 : nvme_ns->ns = ns;
4718 1 : SPDK_DEBUGLOG(bdev_nvme, "NSID %u was added\n", nvme_ns->id);
4719 : }
4720 :
4721 3 : num_sectors = spdk_nvme_ns_get_num_sectors(ns);
4722 3 : bdev = nvme_ns->bdev;
4723 3 : assert(bdev != NULL);
4724 3 : if (bdev->disk.blockcnt != num_sectors) {
4725 1 : SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %" PRIu64 ", new size %" PRIu64 "\n",
4726 : nvme_ns->id,
4727 : bdev->disk.name,
4728 : bdev->disk.blockcnt,
4729 : num_sectors);
4730 1 : rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors);
4731 1 : if (rc != 0) {
4732 0 : SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n",
4733 : bdev->disk.name, rc);
4734 : }
4735 : }
4736 : } else {
4737 : /* Namespace was removed */
4738 1 : nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
4739 : }
4740 :
4741 4 : nvme_ns = next;
4742 : }
4743 :
4744 : /* Loop through all of the namespaces at the nvme level and see if any of them are new */
4745 61 : nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
4746 114 : while (nsid != 0) {
4747 53 : nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
4748 :
4749 53 : if (nvme_ns == NULL) {
4750 : /* Found a new one */
4751 50 : nvme_ns = nvme_ns_alloc();
4752 50 : if (nvme_ns == NULL) {
4753 0 : SPDK_ERRLOG("Failed to allocate namespace\n");
4754 : /* This just fails to attach the namespace. It may work on a future attempt. */
4755 0 : continue;
4756 : }
4757 :
4758 50 : nvme_ns->id = nsid;
4759 50 : nvme_ns->ctrlr = nvme_ctrlr;
4760 :
4761 50 : nvme_ns->bdev = NULL;
4762 :
4763 50 : if (ctx) {
4764 49 : ctx->populates_in_progress++;
4765 : }
4766 50 : nvme_ns->probe_ctx = ctx;
4767 :
4768 50 : RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4769 :
4770 50 : nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns);
4771 : }
4772 :
4773 53 : nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid);
4774 : }
4775 :
4776 61 : if (ctx) {
4777 : /* Decrement this count now that the loop is over to account
4778 : * for the one we started with. If the count is then 0, we
4779 : * know any populate_namespace functions completed immediately,
4780 : * so we'll kick the callback here.
4781 : */
4782 45 : ctx->populates_in_progress--;
4783 45 : if (ctx->populates_in_progress == 0) {
4784 33 : nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
4785 : }
4786 : }
4787 :
4788 61 : }
4789 :
4790 : static void
4791 60 : nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
4792 : {
4793 : struct nvme_ns *nvme_ns, *tmp;
4794 :
4795 107 : RB_FOREACH_SAFE(nvme_ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp) {
4796 47 : nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
4797 : }
4798 60 : }
4799 :
4800 : static uint32_t
4801 36 : nvme_ctrlr_get_ana_log_page_size(struct nvme_ctrlr *nvme_ctrlr)
4802 : {
4803 36 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
4804 : const struct spdk_nvme_ctrlr_data *cdata;
4805 36 : uint32_t nsid, ns_count = 0;
4806 :
4807 36 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
4808 :
4809 36 : for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
4810 80 : nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
4811 44 : ns_count++;
4812 : }
4813 :
4814 36 : return sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
4815 36 : sizeof(struct spdk_nvme_ana_group_descriptor) + ns_count *
4816 : sizeof(uint32_t);
4817 : }
4818 :
4819 : static int
4820 7 : nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
4821 : void *cb_arg)
4822 : {
4823 7 : struct nvme_ctrlr *nvme_ctrlr = cb_arg;
4824 : struct nvme_ns *nvme_ns;
4825 : uint32_t i, nsid;
4826 :
4827 13 : for (i = 0; i < desc->num_of_nsid; i++) {
4828 6 : nsid = desc->nsid[i];
4829 6 : if (nsid == 0) {
4830 0 : continue;
4831 : }
4832 :
4833 6 : nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
4834 :
4835 6 : if (nvme_ns == NULL) {
4836 : /* Target told us that an inactive namespace had an ANA change */
4837 1 : continue;
4838 : }
4839 :
4840 5 : _nvme_ns_set_ana_state(nvme_ns, desc);
4841 : }
4842 :
4843 7 : return 0;
4844 : }
4845 :
4846 : static void
4847 0 : bdev_nvme_disable_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
4848 : {
4849 : struct nvme_ns *nvme_ns;
4850 :
4851 0 : spdk_free(nvme_ctrlr->ana_log_page);
4852 0 : nvme_ctrlr->ana_log_page = NULL;
4853 :
4854 0 : for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
4855 0 : nvme_ns != NULL;
4856 0 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
4857 0 : nvme_ns->ana_state_updating = false;
4858 0 : nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
4859 : }
4860 0 : }
4861 :
4862 : static void
4863 3 : nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
4864 : {
4865 3 : struct nvme_ctrlr *nvme_ctrlr = ctx;
4866 :
4867 3 : if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
4868 3 : bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
4869 : nvme_ctrlr);
4870 : } else {
4871 0 : bdev_nvme_disable_read_ana_log_page(nvme_ctrlr);
4872 : }
4873 :
4874 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4875 :
4876 3 : assert(nvme_ctrlr->ana_log_page_updating == true);
4877 3 : nvme_ctrlr->ana_log_page_updating = false;
4878 :
4879 3 : if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
4880 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4881 :
4882 0 : nvme_ctrlr_unregister(nvme_ctrlr);
4883 : } else {
4884 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4885 :
4886 3 : bdev_nvme_clear_io_path_caches(nvme_ctrlr);
4887 : }
4888 3 : }
4889 :
4890 : static int
4891 6 : nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
4892 : {
4893 : uint32_t ana_log_page_size;
4894 : int rc;
4895 :
4896 6 : if (nvme_ctrlr->ana_log_page == NULL) {
4897 0 : return -EINVAL;
4898 : }
4899 :
4900 6 : ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
4901 :
4902 6 : if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
4903 0 : SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
4904 : ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
4905 0 : return -EINVAL;
4906 : }
4907 :
4908 6 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4909 6 : if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
4910 : nvme_ctrlr->ana_log_page_updating) {
4911 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4912 3 : return -EBUSY;
4913 : }
4914 :
4915 3 : nvme_ctrlr->ana_log_page_updating = true;
4916 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4917 :
4918 3 : rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
4919 : SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
4920 : SPDK_NVME_GLOBAL_NS_TAG,
4921 3 : nvme_ctrlr->ana_log_page,
4922 : ana_log_page_size, 0,
4923 : nvme_ctrlr_read_ana_log_page_done,
4924 : nvme_ctrlr);
4925 3 : if (rc != 0) {
4926 0 : nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
4927 : }
4928 :
4929 3 : return rc;
4930 : }
4931 :
4932 : static void
4933 0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
4934 : {
4935 0 : }
4936 :
4937 : struct bdev_nvme_set_preferred_path_ctx {
4938 : struct spdk_bdev_desc *desc;
4939 : struct nvme_ns *nvme_ns;
4940 : bdev_nvme_set_preferred_path_cb cb_fn;
4941 : void *cb_arg;
4942 : };
4943 :
4944 : static void
4945 3 : bdev_nvme_set_preferred_path_done(struct spdk_io_channel_iter *i, int status)
4946 : {
4947 3 : struct bdev_nvme_set_preferred_path_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
4948 :
4949 3 : assert(ctx != NULL);
4950 3 : assert(ctx->desc != NULL);
4951 3 : assert(ctx->cb_fn != NULL);
4952 :
4953 3 : spdk_bdev_close(ctx->desc);
4954 :
4955 3 : ctx->cb_fn(ctx->cb_arg, status);
4956 :
4957 3 : free(ctx);
4958 3 : }
4959 :
4960 : static void
4961 2 : _bdev_nvme_set_preferred_path(struct spdk_io_channel_iter *i)
4962 : {
4963 2 : struct bdev_nvme_set_preferred_path_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
4964 2 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
4965 2 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
4966 : struct nvme_io_path *io_path, *prev;
4967 :
4968 2 : prev = NULL;
4969 3 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
4970 3 : if (io_path->nvme_ns == ctx->nvme_ns) {
4971 2 : break;
4972 : }
4973 1 : prev = io_path;
4974 : }
4975 :
4976 2 : if (io_path != NULL) {
4977 2 : if (prev != NULL) {
4978 1 : STAILQ_REMOVE_AFTER(&nbdev_ch->io_path_list, prev, stailq);
4979 1 : STAILQ_INSERT_HEAD(&nbdev_ch->io_path_list, io_path, stailq);
4980 : }
4981 :
4982 : /* We can set io_path to nbdev_ch->current_io_path directly here.
4983 : * However, it needs to be conditional. To simplify the code,
4984 : * just clear nbdev_ch->current_io_path and let find_io_path()
4985 : * fill it.
4986 : *
4987 : * Automatic failback may be disabled. Hence even if the io_path is
4988 : * already at the head, clear nbdev_ch->current_io_path.
4989 : */
4990 2 : bdev_nvme_clear_current_io_path(nbdev_ch);
4991 : }
4992 :
4993 2 : spdk_for_each_channel_continue(i, 0);
4994 2 : }
4995 :
4996 : static struct nvme_ns *
4997 3 : bdev_nvme_set_preferred_ns(struct nvme_bdev *nbdev, uint16_t cntlid)
4998 : {
4999 : struct nvme_ns *nvme_ns, *prev;
5000 : const struct spdk_nvme_ctrlr_data *cdata;
5001 :
5002 3 : prev = NULL;
5003 6 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
5004 6 : cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
5005 :
5006 6 : if (cdata->cntlid == cntlid) {
5007 3 : break;
5008 : }
5009 3 : prev = nvme_ns;
5010 : }
5011 :
5012 3 : if (nvme_ns != NULL && prev != NULL) {
5013 2 : TAILQ_REMOVE(&nbdev->nvme_ns_list, nvme_ns, tailq);
5014 2 : TAILQ_INSERT_HEAD(&nbdev->nvme_ns_list, nvme_ns, tailq);
5015 : }
5016 :
5017 3 : return nvme_ns;
5018 : }
5019 :
5020 : /* This function supports only multipath mode. There is only a single I/O path
5021 : * for each NVMe-oF controller. Hence, just move the matched I/O path to the
5022 : * head of the I/O path list for each NVMe bdev channel.
5023 : *
5024 : * NVMe bdev channel may be acquired after completing this function. move the
5025 : * matched namespace to the head of the namespace list for the NVMe bdev too.
5026 : */
5027 : void
5028 3 : bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid,
5029 : bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg)
5030 : {
5031 : struct bdev_nvme_set_preferred_path_ctx *ctx;
5032 : struct spdk_bdev *bdev;
5033 : struct nvme_bdev *nbdev;
5034 3 : int rc = 0;
5035 :
5036 3 : assert(cb_fn != NULL);
5037 :
5038 3 : ctx = calloc(1, sizeof(*ctx));
5039 3 : if (ctx == NULL) {
5040 0 : SPDK_ERRLOG("Failed to alloc context.\n");
5041 0 : rc = -ENOMEM;
5042 0 : goto err_alloc;
5043 : }
5044 :
5045 3 : ctx->cb_fn = cb_fn;
5046 3 : ctx->cb_arg = cb_arg;
5047 :
5048 3 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
5049 3 : if (rc != 0) {
5050 0 : SPDK_ERRLOG("Failed to open bdev %s.\n", name);
5051 0 : goto err_open;
5052 : }
5053 :
5054 3 : bdev = spdk_bdev_desc_get_bdev(ctx->desc);
5055 :
5056 3 : if (bdev->module != &nvme_if) {
5057 0 : SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
5058 0 : rc = -ENODEV;
5059 0 : goto err_bdev;
5060 : }
5061 :
5062 3 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
5063 :
5064 3 : pthread_mutex_lock(&nbdev->mutex);
5065 :
5066 3 : ctx->nvme_ns = bdev_nvme_set_preferred_ns(nbdev, cntlid);
5067 3 : if (ctx->nvme_ns == NULL) {
5068 0 : pthread_mutex_unlock(&nbdev->mutex);
5069 :
5070 0 : SPDK_ERRLOG("bdev %s does not have namespace to controller %u.\n", name, cntlid);
5071 0 : rc = -ENODEV;
5072 0 : goto err_bdev;
5073 : }
5074 :
5075 3 : pthread_mutex_unlock(&nbdev->mutex);
5076 :
5077 3 : spdk_for_each_channel(nbdev,
5078 : _bdev_nvme_set_preferred_path,
5079 : ctx,
5080 : bdev_nvme_set_preferred_path_done);
5081 3 : return;
5082 :
5083 0 : err_bdev:
5084 0 : spdk_bdev_close(ctx->desc);
5085 0 : err_open:
5086 0 : free(ctx);
5087 0 : err_alloc:
5088 0 : cb_fn(cb_arg, rc);
5089 : }
5090 :
5091 : struct bdev_nvme_set_multipath_policy_ctx {
5092 : struct spdk_bdev_desc *desc;
5093 : bdev_nvme_set_multipath_policy_cb cb_fn;
5094 : void *cb_arg;
5095 : };
5096 :
5097 : static void
5098 3 : bdev_nvme_set_multipath_policy_done(struct spdk_io_channel_iter *i, int status)
5099 : {
5100 3 : struct bdev_nvme_set_multipath_policy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
5101 :
5102 3 : assert(ctx != NULL);
5103 3 : assert(ctx->desc != NULL);
5104 3 : assert(ctx->cb_fn != NULL);
5105 :
5106 3 : spdk_bdev_close(ctx->desc);
5107 :
5108 3 : ctx->cb_fn(ctx->cb_arg, status);
5109 :
5110 3 : free(ctx);
5111 3 : }
5112 :
5113 : static void
5114 1 : _bdev_nvme_set_multipath_policy(struct spdk_io_channel_iter *i)
5115 : {
5116 1 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
5117 1 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
5118 1 : struct nvme_bdev *nbdev = spdk_io_channel_get_io_device(_ch);
5119 :
5120 1 : nbdev_ch->mp_policy = nbdev->mp_policy;
5121 1 : nbdev_ch->mp_selector = nbdev->mp_selector;
5122 1 : nbdev_ch->rr_min_io = nbdev->rr_min_io;
5123 1 : bdev_nvme_clear_current_io_path(nbdev_ch);
5124 :
5125 1 : spdk_for_each_channel_continue(i, 0);
5126 1 : }
5127 :
5128 : void
5129 3 : bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy policy,
5130 : enum bdev_nvme_multipath_selector selector, uint32_t rr_min_io,
5131 : bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
5132 : {
5133 : struct bdev_nvme_set_multipath_policy_ctx *ctx;
5134 : struct spdk_bdev *bdev;
5135 : struct nvme_bdev *nbdev;
5136 : int rc;
5137 :
5138 3 : assert(cb_fn != NULL);
5139 :
5140 3 : switch (policy) {
5141 1 : case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
5142 1 : break;
5143 2 : case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
5144 : switch (selector) {
5145 1 : case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
5146 1 : if (rr_min_io == UINT32_MAX) {
5147 0 : rr_min_io = 1;
5148 1 : } else if (rr_min_io == 0) {
5149 0 : rc = -EINVAL;
5150 0 : goto exit;
5151 : }
5152 1 : break;
5153 1 : case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
5154 1 : break;
5155 0 : default:
5156 0 : rc = -EINVAL;
5157 0 : goto exit;
5158 : }
5159 2 : break;
5160 0 : default:
5161 0 : rc = -EINVAL;
5162 0 : goto exit;
5163 : }
5164 :
5165 3 : ctx = calloc(1, sizeof(*ctx));
5166 3 : if (ctx == NULL) {
5167 0 : SPDK_ERRLOG("Failed to alloc context.\n");
5168 0 : rc = -ENOMEM;
5169 0 : goto exit;
5170 : }
5171 :
5172 3 : ctx->cb_fn = cb_fn;
5173 3 : ctx->cb_arg = cb_arg;
5174 :
5175 3 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
5176 3 : if (rc != 0) {
5177 0 : SPDK_ERRLOG("Failed to open bdev %s.\n", name);
5178 0 : rc = -ENODEV;
5179 0 : goto err_open;
5180 : }
5181 :
5182 3 : bdev = spdk_bdev_desc_get_bdev(ctx->desc);
5183 3 : if (bdev->module != &nvme_if) {
5184 0 : SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
5185 0 : rc = -ENODEV;
5186 0 : goto err_module;
5187 : }
5188 3 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
5189 :
5190 3 : pthread_mutex_lock(&nbdev->mutex);
5191 3 : nbdev->mp_policy = policy;
5192 3 : nbdev->mp_selector = selector;
5193 3 : nbdev->rr_min_io = rr_min_io;
5194 3 : pthread_mutex_unlock(&nbdev->mutex);
5195 :
5196 3 : spdk_for_each_channel(nbdev,
5197 : _bdev_nvme_set_multipath_policy,
5198 : ctx,
5199 : bdev_nvme_set_multipath_policy_done);
5200 3 : return;
5201 :
5202 0 : err_module:
5203 0 : spdk_bdev_close(ctx->desc);
5204 0 : err_open:
5205 0 : free(ctx);
5206 0 : exit:
5207 0 : cb_fn(cb_arg, rc);
5208 : }
5209 :
5210 : static void
5211 3 : aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
5212 : {
5213 3 : struct nvme_ctrlr *nvme_ctrlr = arg;
5214 : union spdk_nvme_async_event_completion event;
5215 :
5216 3 : if (spdk_nvme_cpl_is_error(cpl)) {
5217 0 : SPDK_WARNLOG("AER request execute failed\n");
5218 0 : return;
5219 : }
5220 :
5221 3 : event.raw = cpl->cdw0;
5222 3 : if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
5223 3 : (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
5224 2 : nvme_ctrlr_populate_namespaces(nvme_ctrlr, NULL);
5225 1 : } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
5226 1 : (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
5227 1 : nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
5228 : }
5229 : }
5230 :
5231 : static void
5232 51 : free_nvme_async_probe_ctx(struct nvme_async_probe_ctx *ctx)
5233 : {
5234 51 : spdk_keyring_put_key(ctx->drv_opts.tls_psk);
5235 51 : spdk_keyring_put_key(ctx->drv_opts.dhchap_key);
5236 51 : spdk_keyring_put_key(ctx->drv_opts.dhchap_ctrlr_key);
5237 51 : free(ctx);
5238 51 : }
5239 :
5240 : static void
5241 51 : populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, int rc)
5242 : {
5243 51 : if (ctx->cb_fn) {
5244 51 : ctx->cb_fn(ctx->cb_ctx, ctx->reported_bdevs, rc);
5245 : }
5246 :
5247 51 : ctx->namespaces_populated = true;
5248 51 : if (ctx->probe_done) {
5249 : /* The probe was already completed, so we need to free the context
5250 : * here. This can happen for cases like OCSSD, where we need to
5251 : * send additional commands to the SSD after attach.
5252 : */
5253 31 : free_nvme_async_probe_ctx(ctx);
5254 : }
5255 51 : }
5256 :
5257 : static int
5258 18 : bdev_nvme_remove_poller(void *ctx)
5259 : {
5260 18 : struct spdk_nvme_transport_id trid_pcie;
5261 :
5262 18 : if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
5263 1 : spdk_poller_unregister(&g_hotplug_poller);
5264 1 : return SPDK_POLLER_IDLE;
5265 : }
5266 :
5267 17 : memset(&trid_pcie, 0, sizeof(trid_pcie));
5268 17 : spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
5269 :
5270 17 : if (spdk_nvme_scan_attached(&trid_pcie)) {
5271 0 : SPDK_ERRLOG_RATELIMIT("spdk_nvme_scan_attached() failed\n");
5272 : }
5273 :
5274 17 : return SPDK_POLLER_BUSY;
5275 : }
5276 :
5277 : static void
5278 59 : nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr,
5279 : struct nvme_async_probe_ctx *ctx)
5280 : {
5281 59 : spdk_io_device_register(nvme_ctrlr,
5282 : bdev_nvme_create_ctrlr_channel_cb,
5283 : bdev_nvme_destroy_ctrlr_channel_cb,
5284 : sizeof(struct nvme_ctrlr_channel),
5285 59 : nvme_ctrlr->nbdev_ctrlr->name);
5286 :
5287 59 : nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx);
5288 :
5289 59 : if (g_hotplug_poller == NULL) {
5290 2 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
5291 : NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
5292 : }
5293 59 : }
5294 :
5295 : static void
5296 30 : nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl)
5297 : {
5298 30 : struct nvme_ctrlr *nvme_ctrlr = _ctx;
5299 30 : struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx;
5300 :
5301 30 : nvme_ctrlr->probe_ctx = NULL;
5302 :
5303 30 : if (spdk_nvme_cpl_is_error(cpl)) {
5304 0 : nvme_ctrlr_delete(nvme_ctrlr);
5305 :
5306 0 : if (ctx != NULL) {
5307 0 : ctx->reported_bdevs = 0;
5308 0 : populate_namespaces_cb(ctx, -1);
5309 : }
5310 0 : return;
5311 : }
5312 :
5313 30 : nvme_ctrlr_create_done(nvme_ctrlr, ctx);
5314 : }
5315 :
5316 : static int
5317 30 : nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
5318 : struct nvme_async_probe_ctx *ctx)
5319 : {
5320 30 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5321 : const struct spdk_nvme_ctrlr_data *cdata;
5322 : uint32_t ana_log_page_size;
5323 :
5324 30 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5325 :
5326 : /* Set buffer size enough to include maximum number of allowed namespaces. */
5327 30 : ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
5328 30 : sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->mnan *
5329 : sizeof(uint32_t);
5330 :
5331 30 : nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL,
5332 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
5333 30 : if (nvme_ctrlr->ana_log_page == NULL) {
5334 0 : SPDK_ERRLOG("could not allocate ANA log page buffer\n");
5335 0 : return -ENXIO;
5336 : }
5337 :
5338 : /* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned.
5339 : * Hence copy each descriptor to a temporary area when parsing it.
5340 : *
5341 : * Allocate a buffer whose size is as large as ANA log page buffer because
5342 : * we do not know the size of a descriptor until actually reading it.
5343 : */
5344 30 : nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
5345 30 : if (nvme_ctrlr->copied_ana_desc == NULL) {
5346 0 : SPDK_ERRLOG("could not allocate a buffer to parse ANA descriptor\n");
5347 0 : return -ENOMEM;
5348 : }
5349 :
5350 30 : nvme_ctrlr->max_ana_log_page_size = ana_log_page_size;
5351 :
5352 30 : nvme_ctrlr->probe_ctx = ctx;
5353 :
5354 : /* Then, set the read size only to include the current active namespaces. */
5355 30 : ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
5356 :
5357 30 : if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
5358 0 : SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
5359 : ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
5360 0 : return -EINVAL;
5361 : }
5362 :
5363 30 : return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
5364 : SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
5365 : SPDK_NVME_GLOBAL_NS_TAG,
5366 30 : nvme_ctrlr->ana_log_page,
5367 : ana_log_page_size, 0,
5368 : nvme_ctrlr_init_ana_log_page_done,
5369 : nvme_ctrlr);
5370 : }
5371 :
5372 : /* hostnqn and subnqn were already verified before attaching a controller.
5373 : * Hence check only the multipath capability and cntlid here.
5374 : */
5375 : static bool
5376 16 : bdev_nvme_check_multipath(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct spdk_nvme_ctrlr *ctrlr)
5377 : {
5378 : struct nvme_ctrlr *tmp;
5379 : const struct spdk_nvme_ctrlr_data *cdata, *tmp_cdata;
5380 :
5381 16 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5382 :
5383 16 : if (!cdata->cmic.multi_ctrlr) {
5384 0 : SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
5385 0 : return false;
5386 : }
5387 :
5388 33 : TAILQ_FOREACH(tmp, &nbdev_ctrlr->ctrlrs, tailq) {
5389 18 : tmp_cdata = spdk_nvme_ctrlr_get_data(tmp->ctrlr);
5390 :
5391 18 : if (!tmp_cdata->cmic.multi_ctrlr) {
5392 0 : SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
5393 0 : return false;
5394 : }
5395 18 : if (cdata->cntlid == tmp_cdata->cntlid) {
5396 1 : SPDK_ERRLOG("cntlid %u are duplicated.\n", tmp_cdata->cntlid);
5397 1 : return false;
5398 : }
5399 : }
5400 :
5401 15 : return true;
5402 : }
5403 :
5404 : static int
5405 60 : nvme_bdev_ctrlr_create(const char *name, struct nvme_ctrlr *nvme_ctrlr)
5406 : {
5407 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
5408 60 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5409 60 : int rc = 0;
5410 :
5411 60 : pthread_mutex_lock(&g_bdev_nvme_mutex);
5412 :
5413 60 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
5414 60 : if (nbdev_ctrlr != NULL) {
5415 16 : if (!bdev_nvme_check_multipath(nbdev_ctrlr, ctrlr)) {
5416 1 : rc = -EINVAL;
5417 1 : goto exit;
5418 : }
5419 : } else {
5420 44 : nbdev_ctrlr = calloc(1, sizeof(*nbdev_ctrlr));
5421 44 : if (nbdev_ctrlr == NULL) {
5422 0 : SPDK_ERRLOG("Failed to allocate nvme_bdev_ctrlr.\n");
5423 0 : rc = -ENOMEM;
5424 0 : goto exit;
5425 : }
5426 44 : nbdev_ctrlr->name = strdup(name);
5427 44 : if (nbdev_ctrlr->name == NULL) {
5428 0 : SPDK_ERRLOG("Failed to allocate name of nvme_bdev_ctrlr.\n");
5429 0 : free(nbdev_ctrlr);
5430 0 : goto exit;
5431 : }
5432 44 : TAILQ_INIT(&nbdev_ctrlr->ctrlrs);
5433 44 : TAILQ_INIT(&nbdev_ctrlr->bdevs);
5434 44 : TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
5435 : }
5436 59 : nvme_ctrlr->nbdev_ctrlr = nbdev_ctrlr;
5437 59 : TAILQ_INSERT_TAIL(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
5438 60 : exit:
5439 60 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
5440 60 : return rc;
5441 : }
5442 :
5443 : static int
5444 60 : nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
5445 : const char *name,
5446 : const struct spdk_nvme_transport_id *trid,
5447 : struct nvme_async_probe_ctx *ctx)
5448 : {
5449 : struct nvme_ctrlr *nvme_ctrlr;
5450 : struct nvme_path_id *path_id;
5451 : const struct spdk_nvme_ctrlr_data *cdata;
5452 : int rc;
5453 :
5454 60 : nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
5455 60 : if (nvme_ctrlr == NULL) {
5456 0 : SPDK_ERRLOG("Failed to allocate device struct\n");
5457 0 : return -ENOMEM;
5458 : }
5459 :
5460 60 : rc = pthread_mutex_init(&nvme_ctrlr->mutex, NULL);
5461 60 : if (rc != 0) {
5462 0 : free(nvme_ctrlr);
5463 0 : return rc;
5464 : }
5465 :
5466 60 : TAILQ_INIT(&nvme_ctrlr->trids);
5467 60 : RB_INIT(&nvme_ctrlr->namespaces);
5468 :
5469 : /* Get another reference to the key, so the first one can be released from probe_ctx */
5470 60 : if (ctx != NULL) {
5471 46 : if (ctx->drv_opts.tls_psk != NULL) {
5472 0 : nvme_ctrlr->psk = spdk_keyring_get_key(
5473 : spdk_key_get_name(ctx->drv_opts.tls_psk));
5474 0 : if (nvme_ctrlr->psk == NULL) {
5475 : /* Could only happen if the key was removed in the meantime */
5476 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5477 : spdk_key_get_name(ctx->drv_opts.tls_psk));
5478 0 : rc = -ENOKEY;
5479 0 : goto err;
5480 : }
5481 : }
5482 :
5483 46 : if (ctx->drv_opts.dhchap_key != NULL) {
5484 0 : nvme_ctrlr->dhchap_key = spdk_keyring_get_key(
5485 : spdk_key_get_name(ctx->drv_opts.dhchap_key));
5486 0 : if (nvme_ctrlr->dhchap_key == NULL) {
5487 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5488 : spdk_key_get_name(ctx->drv_opts.dhchap_key));
5489 0 : rc = -ENOKEY;
5490 0 : goto err;
5491 : }
5492 : }
5493 :
5494 46 : if (ctx->drv_opts.dhchap_ctrlr_key != NULL) {
5495 0 : nvme_ctrlr->dhchap_ctrlr_key =
5496 0 : spdk_keyring_get_key(
5497 : spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
5498 0 : if (nvme_ctrlr->dhchap_ctrlr_key == NULL) {
5499 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5500 : spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
5501 0 : rc = -ENOKEY;
5502 0 : goto err;
5503 : }
5504 : }
5505 : }
5506 :
5507 60 : path_id = calloc(1, sizeof(*path_id));
5508 60 : if (path_id == NULL) {
5509 0 : SPDK_ERRLOG("Failed to allocate trid entry pointer\n");
5510 0 : rc = -ENOMEM;
5511 0 : goto err;
5512 : }
5513 :
5514 60 : path_id->trid = *trid;
5515 60 : if (ctx != NULL) {
5516 46 : memcpy(path_id->hostid.hostaddr, ctx->drv_opts.src_addr, sizeof(path_id->hostid.hostaddr));
5517 46 : memcpy(path_id->hostid.hostsvcid, ctx->drv_opts.src_svcid, sizeof(path_id->hostid.hostsvcid));
5518 : }
5519 60 : nvme_ctrlr->active_path_id = path_id;
5520 60 : TAILQ_INSERT_HEAD(&nvme_ctrlr->trids, path_id, link);
5521 :
5522 60 : nvme_ctrlr->thread = spdk_get_thread();
5523 60 : nvme_ctrlr->ctrlr = ctrlr;
5524 60 : nvme_ctrlr->ref = 1;
5525 :
5526 60 : if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
5527 0 : SPDK_ERRLOG("OCSSDs are not supported");
5528 0 : rc = -ENOTSUP;
5529 0 : goto err;
5530 : }
5531 :
5532 60 : if (ctx != NULL) {
5533 46 : memcpy(&nvme_ctrlr->opts, &ctx->bdev_opts, sizeof(ctx->bdev_opts));
5534 : } else {
5535 14 : bdev_nvme_get_default_ctrlr_opts(&nvme_ctrlr->opts);
5536 : }
5537 :
5538 60 : nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr,
5539 : g_opts.nvme_adminq_poll_period_us);
5540 :
5541 60 : if (g_opts.timeout_us > 0) {
5542 : /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */
5543 : /* If timeout_admin_us is 0 (not specified), admin uses same timeout as IO. */
5544 0 : uint64_t adm_timeout_us = (g_opts.timeout_admin_us == 0) ?
5545 0 : g_opts.timeout_us : g_opts.timeout_admin_us;
5546 0 : spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
5547 : adm_timeout_us, timeout_cb, nvme_ctrlr);
5548 : }
5549 :
5550 60 : spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
5551 60 : spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr);
5552 :
5553 60 : if (spdk_nvme_ctrlr_get_flags(ctrlr) &
5554 : SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
5555 0 : nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr);
5556 : }
5557 :
5558 60 : rc = nvme_bdev_ctrlr_create(name, nvme_ctrlr);
5559 60 : if (rc != 0) {
5560 1 : goto err;
5561 : }
5562 :
5563 59 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5564 :
5565 59 : if (cdata->cmic.ana_reporting) {
5566 30 : rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx);
5567 30 : if (rc == 0) {
5568 30 : return 0;
5569 : }
5570 : } else {
5571 29 : nvme_ctrlr_create_done(nvme_ctrlr, ctx);
5572 29 : return 0;
5573 : }
5574 :
5575 1 : err:
5576 1 : nvme_ctrlr_delete(nvme_ctrlr);
5577 1 : return rc;
5578 : }
5579 :
5580 : void
5581 56 : bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts)
5582 : {
5583 56 : opts->prchk_flags = 0;
5584 56 : opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
5585 56 : opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
5586 56 : opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
5587 56 : }
5588 :
5589 : static void
5590 0 : attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
5591 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
5592 : {
5593 : char *name;
5594 :
5595 0 : name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
5596 0 : if (!name) {
5597 0 : SPDK_ERRLOG("Failed to assign name to NVMe device\n");
5598 0 : return;
5599 : }
5600 :
5601 0 : if (nvme_ctrlr_create(ctrlr, name, trid, NULL) == 0) {
5602 0 : SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name);
5603 : } else {
5604 0 : SPDK_ERRLOG("Failed to attach to %s (%s)\n", trid->traddr, name);
5605 : }
5606 :
5607 0 : free(name);
5608 : }
5609 :
5610 : static void
5611 59 : _nvme_ctrlr_destruct(void *ctx)
5612 : {
5613 59 : struct nvme_ctrlr *nvme_ctrlr = ctx;
5614 :
5615 59 : nvme_ctrlr_depopulate_namespaces(nvme_ctrlr);
5616 59 : nvme_ctrlr_release(nvme_ctrlr);
5617 59 : }
5618 :
5619 : static int
5620 56 : bdev_nvme_delete_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
5621 : {
5622 : struct nvme_probe_skip_entry *entry;
5623 :
5624 : /* The controller's destruction was already started */
5625 56 : if (nvme_ctrlr->destruct) {
5626 0 : return -EALREADY;
5627 : }
5628 :
5629 56 : if (!hotplug &&
5630 56 : nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
5631 0 : entry = calloc(1, sizeof(*entry));
5632 0 : if (!entry) {
5633 0 : return -ENOMEM;
5634 : }
5635 0 : entry->trid = nvme_ctrlr->active_path_id->trid;
5636 0 : TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
5637 : }
5638 :
5639 56 : nvme_ctrlr->destruct = true;
5640 56 : return 0;
5641 : }
5642 :
5643 : static int
5644 2 : bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
5645 : {
5646 : int rc;
5647 :
5648 2 : pthread_mutex_lock(&nvme_ctrlr->mutex);
5649 2 : rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, hotplug);
5650 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5651 :
5652 2 : if (rc == 0) {
5653 2 : _nvme_ctrlr_destruct(nvme_ctrlr);
5654 0 : } else if (rc == -EALREADY) {
5655 0 : rc = 0;
5656 : }
5657 :
5658 2 : return rc;
5659 : }
5660 :
5661 : static void
5662 0 : remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
5663 : {
5664 0 : struct nvme_ctrlr *nvme_ctrlr = cb_ctx;
5665 :
5666 0 : bdev_nvme_delete_ctrlr(nvme_ctrlr, true);
5667 0 : }
5668 :
5669 : static int
5670 0 : bdev_nvme_hotplug_probe(void *arg)
5671 : {
5672 0 : if (g_hotplug_probe_ctx == NULL) {
5673 0 : spdk_poller_unregister(&g_hotplug_probe_poller);
5674 0 : return SPDK_POLLER_IDLE;
5675 : }
5676 :
5677 0 : if (spdk_nvme_probe_poll_async(g_hotplug_probe_ctx) != -EAGAIN) {
5678 0 : g_hotplug_probe_ctx = NULL;
5679 0 : spdk_poller_unregister(&g_hotplug_probe_poller);
5680 : }
5681 :
5682 0 : return SPDK_POLLER_BUSY;
5683 : }
5684 :
5685 : static int
5686 0 : bdev_nvme_hotplug(void *arg)
5687 : {
5688 0 : struct spdk_nvme_transport_id trid_pcie;
5689 :
5690 0 : if (g_hotplug_probe_ctx) {
5691 0 : return SPDK_POLLER_BUSY;
5692 : }
5693 :
5694 0 : memset(&trid_pcie, 0, sizeof(trid_pcie));
5695 0 : spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
5696 :
5697 0 : g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
5698 : hotplug_probe_cb, attach_cb, NULL);
5699 :
5700 0 : if (g_hotplug_probe_ctx) {
5701 0 : assert(g_hotplug_probe_poller == NULL);
5702 0 : g_hotplug_probe_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug_probe, NULL, 1000);
5703 : }
5704 :
5705 0 : return SPDK_POLLER_BUSY;
5706 : }
5707 :
5708 : void
5709 0 : bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
5710 : {
5711 0 : *opts = g_opts;
5712 0 : }
5713 :
5714 : static bool bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
5715 : uint32_t reconnect_delay_sec,
5716 : uint32_t fast_io_fail_timeout_sec);
5717 :
5718 : static int
5719 0 : bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
5720 : {
5721 0 : if ((opts->timeout_us == 0) && (opts->timeout_admin_us != 0)) {
5722 : /* Can't set timeout_admin_us without also setting timeout_us */
5723 0 : SPDK_WARNLOG("Invalid options: Can't have (timeout_us == 0) with (timeout_admin_us > 0)\n");
5724 0 : return -EINVAL;
5725 : }
5726 :
5727 0 : if (opts->bdev_retry_count < -1) {
5728 0 : SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n");
5729 0 : return -EINVAL;
5730 : }
5731 :
5732 0 : if (!bdev_nvme_check_io_error_resiliency_params(opts->ctrlr_loss_timeout_sec,
5733 0 : opts->reconnect_delay_sec,
5734 0 : opts->fast_io_fail_timeout_sec)) {
5735 0 : return -EINVAL;
5736 : }
5737 :
5738 0 : return 0;
5739 : }
5740 :
5741 : int
5742 0 : bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
5743 : {
5744 : int ret;
5745 :
5746 0 : ret = bdev_nvme_validate_opts(opts);
5747 0 : if (ret) {
5748 0 : SPDK_WARNLOG("Failed to set nvme opts.\n");
5749 0 : return ret;
5750 : }
5751 :
5752 0 : if (g_bdev_nvme_init_thread != NULL) {
5753 0 : if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
5754 0 : return -EPERM;
5755 : }
5756 : }
5757 :
5758 0 : if (opts->rdma_srq_size != 0 ||
5759 0 : opts->rdma_max_cq_size != 0 ||
5760 0 : opts->rdma_cm_event_timeout_ms != 0) {
5761 0 : struct spdk_nvme_transport_opts drv_opts;
5762 :
5763 0 : spdk_nvme_transport_get_opts(&drv_opts, sizeof(drv_opts));
5764 0 : if (opts->rdma_srq_size != 0) {
5765 0 : drv_opts.rdma_srq_size = opts->rdma_srq_size;
5766 : }
5767 0 : if (opts->rdma_max_cq_size != 0) {
5768 0 : drv_opts.rdma_max_cq_size = opts->rdma_max_cq_size;
5769 : }
5770 0 : if (opts->rdma_cm_event_timeout_ms != 0) {
5771 0 : drv_opts.rdma_cm_event_timeout_ms = opts->rdma_cm_event_timeout_ms;
5772 : }
5773 :
5774 0 : ret = spdk_nvme_transport_set_opts(&drv_opts, sizeof(drv_opts));
5775 0 : if (ret) {
5776 0 : SPDK_ERRLOG("Failed to set NVMe transport opts.\n");
5777 0 : return ret;
5778 : }
5779 : }
5780 :
5781 0 : g_opts = *opts;
5782 :
5783 0 : return 0;
5784 : }
5785 :
5786 : struct set_nvme_hotplug_ctx {
5787 : uint64_t period_us;
5788 : bool enabled;
5789 : spdk_msg_fn fn;
5790 : void *fn_ctx;
5791 : };
5792 :
5793 : static void
5794 0 : set_nvme_hotplug_period_cb(void *_ctx)
5795 : {
5796 0 : struct set_nvme_hotplug_ctx *ctx = _ctx;
5797 :
5798 0 : spdk_poller_unregister(&g_hotplug_poller);
5799 0 : if (ctx->enabled) {
5800 0 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us);
5801 : } else {
5802 0 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
5803 : NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
5804 : }
5805 :
5806 0 : g_nvme_hotplug_poll_period_us = ctx->period_us;
5807 0 : g_nvme_hotplug_enabled = ctx->enabled;
5808 0 : if (ctx->fn) {
5809 0 : ctx->fn(ctx->fn_ctx);
5810 : }
5811 :
5812 0 : free(ctx);
5813 0 : }
5814 :
5815 : int
5816 0 : bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
5817 : {
5818 : struct set_nvme_hotplug_ctx *ctx;
5819 :
5820 0 : if (enabled == true && !spdk_process_is_primary()) {
5821 0 : return -EPERM;
5822 : }
5823 :
5824 0 : ctx = calloc(1, sizeof(*ctx));
5825 0 : if (ctx == NULL) {
5826 0 : return -ENOMEM;
5827 : }
5828 :
5829 0 : period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
5830 0 : ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
5831 0 : ctx->enabled = enabled;
5832 0 : ctx->fn = cb;
5833 0 : ctx->fn_ctx = cb_ctx;
5834 :
5835 0 : spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
5836 0 : return 0;
5837 : }
5838 :
5839 : static void
5840 45 : nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
5841 : struct nvme_async_probe_ctx *ctx)
5842 : {
5843 : struct nvme_ns *nvme_ns;
5844 : struct nvme_bdev *nvme_bdev;
5845 : size_t j;
5846 :
5847 45 : assert(nvme_ctrlr != NULL);
5848 :
5849 45 : if (ctx->names == NULL) {
5850 0 : ctx->reported_bdevs = 0;
5851 0 : populate_namespaces_cb(ctx, 0);
5852 0 : return;
5853 : }
5854 :
5855 : /*
5856 : * Report the new bdevs that were created in this call.
5857 : * There can be more than one bdev per NVMe controller.
5858 : */
5859 45 : j = 0;
5860 45 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
5861 92 : while (nvme_ns != NULL) {
5862 47 : nvme_bdev = nvme_ns->bdev;
5863 47 : if (j < ctx->max_bdevs) {
5864 47 : ctx->names[j] = nvme_bdev->disk.name;
5865 47 : j++;
5866 : } else {
5867 0 : SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n",
5868 : ctx->max_bdevs);
5869 0 : ctx->reported_bdevs = 0;
5870 0 : populate_namespaces_cb(ctx, -ERANGE);
5871 0 : return;
5872 : }
5873 :
5874 47 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
5875 : }
5876 :
5877 45 : ctx->reported_bdevs = j;
5878 45 : populate_namespaces_cb(ctx, 0);
5879 : }
5880 :
5881 : static int
5882 9 : bdev_nvme_check_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
5883 : struct spdk_nvme_ctrlr *new_ctrlr,
5884 : struct spdk_nvme_transport_id *trid)
5885 : {
5886 : struct nvme_path_id *tmp_trid;
5887 :
5888 9 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
5889 0 : SPDK_ERRLOG("PCIe failover is not supported.\n");
5890 0 : return -ENOTSUP;
5891 : }
5892 :
5893 : /* Currently we only support failover to the same transport type. */
5894 9 : if (nvme_ctrlr->active_path_id->trid.trtype != trid->trtype) {
5895 0 : SPDK_WARNLOG("Failover from trtype: %s to a different trtype: %s is not supported currently\n",
5896 : spdk_nvme_transport_id_trtype_str(nvme_ctrlr->active_path_id->trid.trtype),
5897 : spdk_nvme_transport_id_trtype_str(trid->trtype));
5898 0 : return -EINVAL;
5899 : }
5900 :
5901 :
5902 : /* Currently we only support failover to the same NQN. */
5903 9 : if (strncmp(trid->subnqn, nvme_ctrlr->active_path_id->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
5904 0 : SPDK_WARNLOG("Failover from subnqn: %s to a different subnqn: %s is not supported currently\n",
5905 : nvme_ctrlr->active_path_id->trid.subnqn, trid->subnqn);
5906 0 : return -EINVAL;
5907 : }
5908 :
5909 : /* Skip all the other checks if we've already registered this path. */
5910 21 : TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) {
5911 12 : if (!spdk_nvme_transport_id_compare(&tmp_trid->trid, trid)) {
5912 0 : SPDK_WARNLOG("This path (traddr: %s subnqn: %s) is already registered\n", trid->traddr,
5913 : trid->subnqn);
5914 0 : return -EALREADY;
5915 : }
5916 : }
5917 :
5918 9 : return 0;
5919 : }
5920 :
5921 : static int
5922 9 : bdev_nvme_check_secondary_namespace(struct nvme_ctrlr *nvme_ctrlr,
5923 : struct spdk_nvme_ctrlr *new_ctrlr)
5924 : {
5925 : struct nvme_ns *nvme_ns;
5926 : struct spdk_nvme_ns *new_ns;
5927 :
5928 9 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
5929 9 : while (nvme_ns != NULL) {
5930 0 : new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id);
5931 0 : assert(new_ns != NULL);
5932 :
5933 0 : if (!bdev_nvme_compare_ns(nvme_ns->ns, new_ns)) {
5934 0 : return -EINVAL;
5935 : }
5936 :
5937 0 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
5938 : }
5939 :
5940 9 : return 0;
5941 : }
5942 :
5943 : static int
5944 9 : _bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
5945 : struct spdk_nvme_transport_id *trid)
5946 : {
5947 : struct nvme_path_id *active_id, *new_trid, *tmp_trid;
5948 :
5949 9 : new_trid = calloc(1, sizeof(*new_trid));
5950 9 : if (new_trid == NULL) {
5951 0 : return -ENOMEM;
5952 : }
5953 9 : new_trid->trid = *trid;
5954 :
5955 9 : active_id = nvme_ctrlr->active_path_id;
5956 9 : assert(active_id != NULL);
5957 9 : assert(active_id == TAILQ_FIRST(&nvme_ctrlr->trids));
5958 :
5959 : /* Skip the active trid not to replace it until it is failed. */
5960 9 : tmp_trid = TAILQ_NEXT(active_id, link);
5961 9 : if (tmp_trid == NULL) {
5962 6 : goto add_tail;
5963 : }
5964 :
5965 : /* It means the trid is faled if its last failed time is non-zero.
5966 : * Insert the new alternate trid before any failed trid.
5967 : */
5968 5 : TAILQ_FOREACH_FROM(tmp_trid, &nvme_ctrlr->trids, link) {
5969 3 : if (tmp_trid->last_failed_tsc != 0) {
5970 1 : TAILQ_INSERT_BEFORE(tmp_trid, new_trid, link);
5971 1 : return 0;
5972 : }
5973 : }
5974 :
5975 2 : add_tail:
5976 8 : TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, new_trid, link);
5977 8 : return 0;
5978 : }
5979 :
5980 : /* This is the case that a secondary path is added to an existing
5981 : * nvme_ctrlr for failover. After checking if it can access the same
5982 : * namespaces as the primary path, it is disconnected until failover occurs.
5983 : */
5984 : static int
5985 9 : bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
5986 : struct spdk_nvme_ctrlr *new_ctrlr,
5987 : struct spdk_nvme_transport_id *trid)
5988 : {
5989 : int rc;
5990 :
5991 9 : assert(nvme_ctrlr != NULL);
5992 :
5993 9 : pthread_mutex_lock(&nvme_ctrlr->mutex);
5994 :
5995 9 : rc = bdev_nvme_check_secondary_trid(nvme_ctrlr, new_ctrlr, trid);
5996 9 : if (rc != 0) {
5997 0 : goto exit;
5998 : }
5999 :
6000 9 : rc = bdev_nvme_check_secondary_namespace(nvme_ctrlr, new_ctrlr);
6001 9 : if (rc != 0) {
6002 0 : goto exit;
6003 : }
6004 :
6005 9 : rc = _bdev_nvme_add_secondary_trid(nvme_ctrlr, trid);
6006 :
6007 9 : exit:
6008 9 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6009 :
6010 9 : spdk_nvme_detach(new_ctrlr);
6011 :
6012 9 : return rc;
6013 : }
6014 :
6015 : static void
6016 46 : connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6017 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
6018 : {
6019 46 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
6020 : struct nvme_async_probe_ctx *ctx;
6021 : int rc;
6022 :
6023 46 : ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
6024 46 : ctx->ctrlr_attached = true;
6025 :
6026 46 : rc = nvme_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx);
6027 46 : if (rc != 0) {
6028 1 : ctx->reported_bdevs = 0;
6029 1 : populate_namespaces_cb(ctx, rc);
6030 : }
6031 46 : }
6032 :
6033 : static void
6034 4 : connect_set_failover_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6035 : struct spdk_nvme_ctrlr *ctrlr,
6036 : const struct spdk_nvme_ctrlr_opts *opts)
6037 : {
6038 4 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
6039 : struct nvme_ctrlr *nvme_ctrlr;
6040 : struct nvme_async_probe_ctx *ctx;
6041 : int rc;
6042 :
6043 4 : ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
6044 4 : ctx->ctrlr_attached = true;
6045 :
6046 4 : nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
6047 4 : if (nvme_ctrlr) {
6048 4 : rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, ctrlr, &ctx->trid);
6049 : } else {
6050 0 : rc = -ENODEV;
6051 : }
6052 :
6053 4 : ctx->reported_bdevs = 0;
6054 4 : populate_namespaces_cb(ctx, rc);
6055 4 : }
6056 :
6057 : static int
6058 51 : bdev_nvme_async_poll(void *arg)
6059 : {
6060 51 : struct nvme_async_probe_ctx *ctx = arg;
6061 : int rc;
6062 :
6063 51 : rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
6064 51 : if (spdk_unlikely(rc != -EAGAIN)) {
6065 51 : ctx->probe_done = true;
6066 51 : spdk_poller_unregister(&ctx->poller);
6067 51 : if (!ctx->ctrlr_attached) {
6068 : /* The probe is done, but no controller was attached.
6069 : * That means we had a failure, so report -EIO back to
6070 : * the caller (usually the RPC). populate_namespaces_cb()
6071 : * will take care of freeing the nvme_async_probe_ctx.
6072 : */
6073 1 : ctx->reported_bdevs = 0;
6074 1 : populate_namespaces_cb(ctx, -EIO);
6075 50 : } else if (ctx->namespaces_populated) {
6076 : /* The namespaces for the attached controller were all
6077 : * populated and the response was already sent to the
6078 : * caller (usually the RPC). So free the context here.
6079 : */
6080 20 : free_nvme_async_probe_ctx(ctx);
6081 : }
6082 : }
6083 :
6084 51 : return SPDK_POLLER_BUSY;
6085 : }
6086 :
6087 : static bool
6088 28 : bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
6089 : uint32_t reconnect_delay_sec,
6090 : uint32_t fast_io_fail_timeout_sec)
6091 : {
6092 28 : if (ctrlr_loss_timeout_sec < -1) {
6093 1 : SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n");
6094 1 : return false;
6095 27 : } else if (ctrlr_loss_timeout_sec == -1) {
6096 13 : if (reconnect_delay_sec == 0) {
6097 1 : SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
6098 1 : return false;
6099 12 : } else if (fast_io_fail_timeout_sec != 0 &&
6100 : fast_io_fail_timeout_sec < reconnect_delay_sec) {
6101 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io-fail_timeout_sec.\n");
6102 1 : return false;
6103 : }
6104 14 : } else if (ctrlr_loss_timeout_sec != 0) {
6105 11 : if (reconnect_delay_sec == 0) {
6106 1 : SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
6107 1 : return false;
6108 10 : } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) {
6109 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n");
6110 1 : return false;
6111 9 : } else if (fast_io_fail_timeout_sec != 0) {
6112 6 : if (fast_io_fail_timeout_sec < reconnect_delay_sec) {
6113 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io_fail_timeout_sec.\n");
6114 1 : return false;
6115 5 : } else if (fast_io_fail_timeout_sec > (uint32_t)ctrlr_loss_timeout_sec) {
6116 1 : SPDK_ERRLOG("fast_io_fail_timeout_sec can't be more than ctrlr_loss_timeout_sec.\n");
6117 1 : return false;
6118 : }
6119 : }
6120 3 : } else if (reconnect_delay_sec != 0 || fast_io_fail_timeout_sec != 0) {
6121 2 : SPDK_ERRLOG("Both reconnect_delay_sec and fast_io_fail_timeout_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n");
6122 2 : return false;
6123 : }
6124 :
6125 19 : return true;
6126 : }
6127 :
6128 : static int
6129 0 : bdev_nvme_load_psk(const char *fname, char *buf, size_t bufsz)
6130 : {
6131 : FILE *psk_file;
6132 0 : struct stat statbuf;
6133 : int rc;
6134 : #define TCP_PSK_INVALID_PERMISSIONS 0177
6135 :
6136 0 : if (stat(fname, &statbuf) != 0) {
6137 0 : SPDK_ERRLOG("Could not read permissions for PSK file\n");
6138 0 : return -EACCES;
6139 : }
6140 :
6141 0 : if ((statbuf.st_mode & TCP_PSK_INVALID_PERMISSIONS) != 0) {
6142 0 : SPDK_ERRLOG("Incorrect permissions for PSK file\n");
6143 0 : return -EPERM;
6144 : }
6145 0 : if ((size_t)statbuf.st_size >= bufsz) {
6146 0 : SPDK_ERRLOG("Invalid PSK: too long\n");
6147 0 : return -EINVAL;
6148 : }
6149 0 : psk_file = fopen(fname, "r");
6150 0 : if (psk_file == NULL) {
6151 0 : SPDK_ERRLOG("Could not open PSK file\n");
6152 0 : return -EINVAL;
6153 : }
6154 :
6155 0 : memset(buf, 0, bufsz);
6156 0 : rc = fread(buf, 1, statbuf.st_size, psk_file);
6157 0 : if (rc != statbuf.st_size) {
6158 0 : SPDK_ERRLOG("Failed to read PSK\n");
6159 0 : fclose(psk_file);
6160 0 : return -EINVAL;
6161 : }
6162 :
6163 0 : fclose(psk_file);
6164 0 : return 0;
6165 : }
6166 :
6167 : int
6168 51 : bdev_nvme_create(struct spdk_nvme_transport_id *trid,
6169 : const char *base_name,
6170 : const char **names,
6171 : uint32_t count,
6172 : spdk_bdev_create_nvme_fn cb_fn,
6173 : void *cb_ctx,
6174 : struct spdk_nvme_ctrlr_opts *drv_opts,
6175 : struct nvme_ctrlr_opts *bdev_opts,
6176 : bool multipath)
6177 : {
6178 : struct nvme_probe_skip_entry *entry, *tmp;
6179 : struct nvme_async_probe_ctx *ctx;
6180 : spdk_nvme_attach_cb attach_cb;
6181 : int rc, len;
6182 :
6183 : /* TODO expand this check to include both the host and target TRIDs.
6184 : * Only if both are the same should we fail.
6185 : */
6186 51 : if (nvme_ctrlr_get(trid, drv_opts->hostnqn) != NULL) {
6187 0 : SPDK_ERRLOG("A controller with the provided trid (traddr: %s, hostnqn: %s) "
6188 : "already exists.\n", trid->traddr, drv_opts->hostnqn);
6189 0 : return -EEXIST;
6190 : }
6191 :
6192 51 : len = strnlen(base_name, SPDK_CONTROLLER_NAME_MAX);
6193 :
6194 51 : if (len == 0 || len == SPDK_CONTROLLER_NAME_MAX) {
6195 0 : SPDK_ERRLOG("controller name must be between 1 and %d characters\n", SPDK_CONTROLLER_NAME_MAX - 1);
6196 0 : return -EINVAL;
6197 : }
6198 :
6199 51 : if (bdev_opts != NULL &&
6200 9 : !bdev_nvme_check_io_error_resiliency_params(bdev_opts->ctrlr_loss_timeout_sec,
6201 : bdev_opts->reconnect_delay_sec,
6202 : bdev_opts->fast_io_fail_timeout_sec)) {
6203 0 : return -EINVAL;
6204 : }
6205 :
6206 51 : ctx = calloc(1, sizeof(*ctx));
6207 51 : if (!ctx) {
6208 0 : return -ENOMEM;
6209 : }
6210 51 : ctx->base_name = base_name;
6211 51 : ctx->names = names;
6212 51 : ctx->max_bdevs = count;
6213 51 : ctx->cb_fn = cb_fn;
6214 51 : ctx->cb_ctx = cb_ctx;
6215 51 : ctx->trid = *trid;
6216 :
6217 51 : if (bdev_opts) {
6218 9 : memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
6219 : } else {
6220 42 : bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
6221 : }
6222 :
6223 51 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
6224 0 : TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
6225 0 : if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
6226 0 : TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
6227 0 : free(entry);
6228 0 : break;
6229 : }
6230 : }
6231 : }
6232 :
6233 51 : memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
6234 51 : ctx->drv_opts.transport_retry_count = g_opts.transport_retry_count;
6235 51 : ctx->drv_opts.transport_ack_timeout = g_opts.transport_ack_timeout;
6236 51 : ctx->drv_opts.keep_alive_timeout_ms = g_opts.keep_alive_timeout_ms;
6237 51 : ctx->drv_opts.disable_read_ana_log_page = true;
6238 51 : ctx->drv_opts.transport_tos = g_opts.transport_tos;
6239 :
6240 51 : if (ctx->bdev_opts.psk[0] != '\0') {
6241 : /* Try to use the keyring first */
6242 0 : ctx->drv_opts.tls_psk = spdk_keyring_get_key(ctx->bdev_opts.psk);
6243 0 : if (ctx->drv_opts.tls_psk == NULL) {
6244 0 : rc = bdev_nvme_load_psk(ctx->bdev_opts.psk,
6245 0 : ctx->drv_opts.psk, sizeof(ctx->drv_opts.psk));
6246 0 : if (rc != 0) {
6247 0 : SPDK_ERRLOG("Could not load PSK from %s\n", ctx->bdev_opts.psk);
6248 0 : free_nvme_async_probe_ctx(ctx);
6249 0 : return rc;
6250 : }
6251 : }
6252 : }
6253 :
6254 51 : if (ctx->bdev_opts.dhchap_key != NULL) {
6255 0 : ctx->drv_opts.dhchap_key = spdk_keyring_get_key(ctx->bdev_opts.dhchap_key);
6256 0 : if (ctx->drv_opts.dhchap_key == NULL) {
6257 0 : SPDK_ERRLOG("Could not load DH-HMAC-CHAP key: %s\n",
6258 : ctx->bdev_opts.dhchap_key);
6259 0 : free_nvme_async_probe_ctx(ctx);
6260 0 : return -ENOKEY;
6261 : }
6262 :
6263 0 : ctx->drv_opts.dhchap_digests = g_opts.dhchap_digests;
6264 0 : ctx->drv_opts.dhchap_dhgroups = g_opts.dhchap_dhgroups;
6265 : }
6266 51 : if (ctx->bdev_opts.dhchap_ctrlr_key != NULL) {
6267 0 : ctx->drv_opts.dhchap_ctrlr_key =
6268 0 : spdk_keyring_get_key(ctx->bdev_opts.dhchap_ctrlr_key);
6269 0 : if (ctx->drv_opts.dhchap_ctrlr_key == NULL) {
6270 0 : SPDK_ERRLOG("Could not load DH-HMAC-CHAP controller key: %s\n",
6271 : ctx->bdev_opts.dhchap_ctrlr_key);
6272 0 : free_nvme_async_probe_ctx(ctx);
6273 0 : return -ENOKEY;
6274 : }
6275 : }
6276 :
6277 51 : if (nvme_bdev_ctrlr_get_by_name(base_name) == NULL || multipath) {
6278 47 : attach_cb = connect_attach_cb;
6279 : } else {
6280 4 : attach_cb = connect_set_failover_cb;
6281 : }
6282 :
6283 51 : ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, attach_cb);
6284 51 : if (ctx->probe_ctx == NULL) {
6285 0 : SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
6286 0 : free_nvme_async_probe_ctx(ctx);
6287 0 : return -ENODEV;
6288 : }
6289 51 : ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000);
6290 :
6291 51 : return 0;
6292 : }
6293 :
6294 : struct bdev_nvme_delete_ctx {
6295 : char *name;
6296 : struct nvme_path_id path_id;
6297 : bdev_nvme_delete_done_fn delete_done;
6298 : void *delete_done_ctx;
6299 : uint64_t timeout_ticks;
6300 : struct spdk_poller *poller;
6301 : };
6302 :
6303 : static void
6304 2 : free_bdev_nvme_delete_ctx(struct bdev_nvme_delete_ctx *ctx)
6305 : {
6306 2 : if (ctx != NULL) {
6307 1 : free(ctx->name);
6308 1 : free(ctx);
6309 : }
6310 2 : }
6311 :
6312 : static bool
6313 74 : nvme_path_id_compare(struct nvme_path_id *p, const struct nvme_path_id *path_id)
6314 : {
6315 74 : if (path_id->trid.trtype != 0) {
6316 21 : if (path_id->trid.trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
6317 0 : if (strcasecmp(path_id->trid.trstring, p->trid.trstring) != 0) {
6318 0 : return false;
6319 : }
6320 : } else {
6321 21 : if (path_id->trid.trtype != p->trid.trtype) {
6322 0 : return false;
6323 : }
6324 : }
6325 : }
6326 :
6327 74 : if (!spdk_mem_all_zero(path_id->trid.traddr, sizeof(path_id->trid.traddr))) {
6328 21 : if (strcasecmp(path_id->trid.traddr, p->trid.traddr) != 0) {
6329 11 : return false;
6330 : }
6331 : }
6332 :
6333 63 : if (path_id->trid.adrfam != 0) {
6334 0 : if (path_id->trid.adrfam != p->trid.adrfam) {
6335 0 : return false;
6336 : }
6337 : }
6338 :
6339 63 : if (!spdk_mem_all_zero(path_id->trid.trsvcid, sizeof(path_id->trid.trsvcid))) {
6340 10 : if (strcasecmp(path_id->trid.trsvcid, p->trid.trsvcid) != 0) {
6341 0 : return false;
6342 : }
6343 : }
6344 :
6345 63 : if (!spdk_mem_all_zero(path_id->trid.subnqn, sizeof(path_id->trid.subnqn))) {
6346 10 : if (strcmp(path_id->trid.subnqn, p->trid.subnqn) != 0) {
6347 0 : return false;
6348 : }
6349 : }
6350 :
6351 63 : if (!spdk_mem_all_zero(path_id->hostid.hostaddr, sizeof(path_id->hostid.hostaddr))) {
6352 0 : if (strcmp(path_id->hostid.hostaddr, p->hostid.hostaddr) != 0) {
6353 0 : return false;
6354 : }
6355 : }
6356 :
6357 63 : if (!spdk_mem_all_zero(path_id->hostid.hostsvcid, sizeof(path_id->hostid.hostsvcid))) {
6358 0 : if (strcmp(path_id->hostid.hostsvcid, p->hostid.hostsvcid) != 0) {
6359 0 : return false;
6360 : }
6361 : }
6362 :
6363 63 : return true;
6364 : }
6365 :
6366 : static bool
6367 2 : nvme_path_id_exists(const char *name, const struct nvme_path_id *path_id)
6368 : {
6369 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
6370 : struct nvme_ctrlr *ctrlr;
6371 : struct nvme_path_id *p;
6372 :
6373 2 : pthread_mutex_lock(&g_bdev_nvme_mutex);
6374 2 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
6375 2 : if (!nbdev_ctrlr) {
6376 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6377 1 : return false;
6378 : }
6379 :
6380 1 : TAILQ_FOREACH(ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
6381 1 : pthread_mutex_lock(&ctrlr->mutex);
6382 1 : TAILQ_FOREACH(p, &ctrlr->trids, link) {
6383 1 : if (nvme_path_id_compare(p, path_id)) {
6384 1 : pthread_mutex_unlock(&ctrlr->mutex);
6385 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6386 1 : return true;
6387 : }
6388 : }
6389 0 : pthread_mutex_unlock(&ctrlr->mutex);
6390 : }
6391 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6392 :
6393 0 : return false;
6394 : }
6395 :
6396 : static int
6397 2 : bdev_nvme_delete_complete_poll(void *arg)
6398 : {
6399 2 : struct bdev_nvme_delete_ctx *ctx = arg;
6400 2 : int rc = 0;
6401 :
6402 2 : if (nvme_path_id_exists(ctx->name, &ctx->path_id)) {
6403 1 : if (ctx->timeout_ticks > spdk_get_ticks()) {
6404 1 : return SPDK_POLLER_BUSY;
6405 : }
6406 :
6407 0 : SPDK_ERRLOG("NVMe path '%s' still exists after delete\n", ctx->name);
6408 0 : rc = -ETIMEDOUT;
6409 : }
6410 :
6411 1 : spdk_poller_unregister(&ctx->poller);
6412 :
6413 1 : ctx->delete_done(ctx->delete_done_ctx, rc);
6414 1 : free_bdev_nvme_delete_ctx(ctx);
6415 :
6416 1 : return SPDK_POLLER_BUSY;
6417 : }
6418 :
6419 : static int
6420 63 : _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, const struct nvme_path_id *path_id)
6421 : {
6422 : struct nvme_path_id *p, *t;
6423 : spdk_msg_fn msg_fn;
6424 63 : int rc = -ENXIO;
6425 :
6426 63 : pthread_mutex_lock(&nvme_ctrlr->mutex);
6427 :
6428 73 : TAILQ_FOREACH_REVERSE_SAFE(p, &nvme_ctrlr->trids, nvme_paths, link, t) {
6429 73 : if (p == TAILQ_FIRST(&nvme_ctrlr->trids)) {
6430 63 : break;
6431 : }
6432 :
6433 10 : if (!nvme_path_id_compare(p, path_id)) {
6434 3 : continue;
6435 : }
6436 :
6437 : /* We are not using the specified path. */
6438 7 : TAILQ_REMOVE(&nvme_ctrlr->trids, p, link);
6439 7 : free(p);
6440 7 : rc = 0;
6441 : }
6442 :
6443 63 : if (p == NULL || !nvme_path_id_compare(p, path_id)) {
6444 8 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6445 8 : return rc;
6446 : }
6447 :
6448 : /* If we made it here, then this path is a match! Now we need to remove it. */
6449 :
6450 : /* This is the active path in use right now. The active path is always the first in the list. */
6451 55 : assert(p == nvme_ctrlr->active_path_id);
6452 :
6453 55 : if (!TAILQ_NEXT(p, link)) {
6454 : /* The current path is the only path. */
6455 54 : msg_fn = _nvme_ctrlr_destruct;
6456 54 : rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, false);
6457 : } else {
6458 : /* There is an alternative path. */
6459 1 : msg_fn = _bdev_nvme_reset_ctrlr;
6460 1 : rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, true);
6461 : }
6462 :
6463 55 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6464 :
6465 55 : if (rc == 0) {
6466 55 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
6467 0 : } else if (rc == -EALREADY) {
6468 0 : rc = 0;
6469 : }
6470 :
6471 55 : return rc;
6472 : }
6473 :
6474 : int
6475 48 : bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id,
6476 : bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx)
6477 : {
6478 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
6479 : struct nvme_ctrlr *nvme_ctrlr, *tmp_nvme_ctrlr;
6480 48 : struct bdev_nvme_delete_ctx *ctx = NULL;
6481 48 : int rc = -ENXIO, _rc;
6482 :
6483 48 : if (name == NULL || path_id == NULL) {
6484 0 : rc = -EINVAL;
6485 0 : goto exit;
6486 : }
6487 :
6488 48 : pthread_mutex_lock(&g_bdev_nvme_mutex);
6489 :
6490 48 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
6491 48 : if (nbdev_ctrlr == NULL) {
6492 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6493 :
6494 0 : SPDK_ERRLOG("Failed to find NVMe bdev controller\n");
6495 0 : rc = -ENODEV;
6496 0 : goto exit;
6497 : }
6498 :
6499 111 : TAILQ_FOREACH_SAFE(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq, tmp_nvme_ctrlr) {
6500 63 : _rc = _bdev_nvme_delete(nvme_ctrlr, path_id);
6501 63 : if (_rc < 0 && _rc != -ENXIO) {
6502 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6503 0 : rc = _rc;
6504 0 : goto exit;
6505 63 : } else if (_rc == 0) {
6506 : /* We traverse all remaining nvme_ctrlrs even if one nvme_ctrlr
6507 : * was deleted successfully. To remember the successful deletion,
6508 : * overwrite rc only if _rc is zero.
6509 : */
6510 57 : rc = 0;
6511 : }
6512 : }
6513 :
6514 48 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6515 :
6516 48 : if (rc != 0 || delete_done == NULL) {
6517 47 : goto exit;
6518 : }
6519 :
6520 1 : ctx = calloc(1, sizeof(*ctx));
6521 1 : if (ctx == NULL) {
6522 0 : SPDK_ERRLOG("Failed to allocate context for bdev_nvme_delete\n");
6523 0 : rc = -ENOMEM;
6524 0 : goto exit;
6525 : }
6526 :
6527 1 : ctx->name = strdup(name);
6528 1 : if (ctx->name == NULL) {
6529 0 : SPDK_ERRLOG("Failed to copy controller name for deletion\n");
6530 0 : rc = -ENOMEM;
6531 0 : goto exit;
6532 : }
6533 :
6534 1 : ctx->delete_done = delete_done;
6535 1 : ctx->delete_done_ctx = delete_done_ctx;
6536 1 : ctx->path_id = *path_id;
6537 1 : ctx->timeout_ticks = spdk_get_ticks() + 10 * spdk_get_ticks_hz();
6538 1 : ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_delete_complete_poll, ctx, 1000);
6539 1 : if (ctx->poller == NULL) {
6540 0 : SPDK_ERRLOG("Failed to register bdev_nvme_delete poller\n");
6541 0 : rc = -ENOMEM;
6542 0 : goto exit;
6543 : }
6544 :
6545 1 : exit:
6546 48 : if (rc != 0) {
6547 1 : free_bdev_nvme_delete_ctx(ctx);
6548 : }
6549 :
6550 48 : return rc;
6551 : }
6552 :
6553 : #define DISCOVERY_INFOLOG(ctx, format, ...) \
6554 : SPDK_INFOLOG(bdev_nvme, "Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
6555 :
6556 : #define DISCOVERY_ERRLOG(ctx, format, ...) \
6557 : SPDK_ERRLOG("Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
6558 :
6559 : struct discovery_entry_ctx {
6560 : char name[128];
6561 : struct spdk_nvme_transport_id trid;
6562 : struct spdk_nvme_ctrlr_opts drv_opts;
6563 : struct spdk_nvmf_discovery_log_page_entry entry;
6564 : TAILQ_ENTRY(discovery_entry_ctx) tailq;
6565 : struct discovery_ctx *ctx;
6566 : };
6567 :
6568 : struct discovery_ctx {
6569 : char *name;
6570 : spdk_bdev_nvme_start_discovery_fn start_cb_fn;
6571 : spdk_bdev_nvme_stop_discovery_fn stop_cb_fn;
6572 : void *cb_ctx;
6573 : struct spdk_nvme_probe_ctx *probe_ctx;
6574 : struct spdk_nvme_detach_ctx *detach_ctx;
6575 : struct spdk_nvme_ctrlr *ctrlr;
6576 : struct spdk_nvme_transport_id trid;
6577 : struct discovery_entry_ctx *entry_ctx_in_use;
6578 : struct spdk_poller *poller;
6579 : struct spdk_nvme_ctrlr_opts drv_opts;
6580 : struct nvme_ctrlr_opts bdev_opts;
6581 : struct spdk_nvmf_discovery_log_page *log_page;
6582 : TAILQ_ENTRY(discovery_ctx) tailq;
6583 : TAILQ_HEAD(, discovery_entry_ctx) nvm_entry_ctxs;
6584 : TAILQ_HEAD(, discovery_entry_ctx) discovery_entry_ctxs;
6585 : int rc;
6586 : bool wait_for_attach;
6587 : uint64_t timeout_ticks;
6588 : /* Denotes that the discovery service is being started. We're waiting
6589 : * for the initial connection to the discovery controller to be
6590 : * established and attach discovered NVM ctrlrs.
6591 : */
6592 : bool initializing;
6593 : /* Denotes if a discovery is currently in progress for this context.
6594 : * That includes connecting to newly discovered subsystems. Used to
6595 : * ensure we do not start a new discovery until an existing one is
6596 : * complete.
6597 : */
6598 : bool in_progress;
6599 :
6600 : /* Denotes if another discovery is needed after the one in progress
6601 : * completes. Set when we receive an AER completion while a discovery
6602 : * is already in progress.
6603 : */
6604 : bool pending;
6605 :
6606 : /* Signal to the discovery context poller that it should stop the
6607 : * discovery service, including detaching from the current discovery
6608 : * controller.
6609 : */
6610 : bool stop;
6611 :
6612 : struct spdk_thread *calling_thread;
6613 : uint32_t index;
6614 : uint32_t attach_in_progress;
6615 : char *hostnqn;
6616 :
6617 : /* Denotes if the discovery service was started by the mdns discovery.
6618 : */
6619 : bool from_mdns_discovery_service;
6620 : };
6621 :
6622 : TAILQ_HEAD(discovery_ctxs, discovery_ctx);
6623 : static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs);
6624 :
6625 : static void get_discovery_log_page(struct discovery_ctx *ctx);
6626 :
6627 : static void
6628 0 : free_discovery_ctx(struct discovery_ctx *ctx)
6629 : {
6630 0 : free(ctx->log_page);
6631 0 : free(ctx->hostnqn);
6632 0 : free(ctx->name);
6633 0 : free(ctx);
6634 0 : }
6635 :
6636 : static void
6637 0 : discovery_complete(struct discovery_ctx *ctx)
6638 : {
6639 0 : ctx->initializing = false;
6640 0 : ctx->in_progress = false;
6641 0 : if (ctx->pending) {
6642 0 : ctx->pending = false;
6643 0 : get_discovery_log_page(ctx);
6644 : }
6645 0 : }
6646 :
6647 : static void
6648 0 : build_trid_from_log_page_entry(struct spdk_nvme_transport_id *trid,
6649 : struct spdk_nvmf_discovery_log_page_entry *entry)
6650 : {
6651 : char *space;
6652 :
6653 0 : trid->trtype = entry->trtype;
6654 0 : trid->adrfam = entry->adrfam;
6655 0 : memcpy(trid->traddr, entry->traddr, sizeof(entry->traddr));
6656 0 : memcpy(trid->trsvcid, entry->trsvcid, sizeof(entry->trsvcid));
6657 : /* Because the source buffer (entry->subnqn) is longer than trid->subnqn, and
6658 : * before call to this function trid->subnqn is zeroed out, we need
6659 : * to copy sizeof(trid->subnqn) minus one byte to make sure the last character
6660 : * remains 0. Then we can shorten the string (replace ' ' with 0) if required
6661 : */
6662 0 : memcpy(trid->subnqn, entry->subnqn, sizeof(trid->subnqn) - 1);
6663 :
6664 : /* We want the traddr, trsvcid and subnqn fields to be NULL-terminated.
6665 : * But the log page entries typically pad them with spaces, not zeroes.
6666 : * So add a NULL terminator to each of these fields at the appropriate
6667 : * location.
6668 : */
6669 0 : space = strchr(trid->traddr, ' ');
6670 0 : if (space) {
6671 0 : *space = 0;
6672 : }
6673 0 : space = strchr(trid->trsvcid, ' ');
6674 0 : if (space) {
6675 0 : *space = 0;
6676 : }
6677 0 : space = strchr(trid->subnqn, ' ');
6678 0 : if (space) {
6679 0 : *space = 0;
6680 : }
6681 0 : }
6682 :
6683 : static void
6684 0 : _stop_discovery(void *_ctx)
6685 : {
6686 0 : struct discovery_ctx *ctx = _ctx;
6687 :
6688 0 : if (ctx->attach_in_progress > 0) {
6689 0 : spdk_thread_send_msg(spdk_get_thread(), _stop_discovery, ctx);
6690 0 : return;
6691 : }
6692 :
6693 0 : ctx->stop = true;
6694 :
6695 0 : while (!TAILQ_EMPTY(&ctx->nvm_entry_ctxs)) {
6696 : struct discovery_entry_ctx *entry_ctx;
6697 0 : struct nvme_path_id path = {};
6698 :
6699 0 : entry_ctx = TAILQ_FIRST(&ctx->nvm_entry_ctxs);
6700 0 : path.trid = entry_ctx->trid;
6701 0 : bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
6702 0 : TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
6703 0 : free(entry_ctx);
6704 : }
6705 :
6706 0 : while (!TAILQ_EMPTY(&ctx->discovery_entry_ctxs)) {
6707 : struct discovery_entry_ctx *entry_ctx;
6708 :
6709 0 : entry_ctx = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
6710 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
6711 0 : free(entry_ctx);
6712 : }
6713 :
6714 0 : free(ctx->entry_ctx_in_use);
6715 0 : ctx->entry_ctx_in_use = NULL;
6716 : }
6717 :
6718 : static void
6719 0 : stop_discovery(struct discovery_ctx *ctx, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
6720 : {
6721 0 : ctx->stop_cb_fn = cb_fn;
6722 0 : ctx->cb_ctx = cb_ctx;
6723 :
6724 0 : if (ctx->attach_in_progress > 0) {
6725 0 : DISCOVERY_INFOLOG(ctx, "stopping discovery with attach_in_progress: %"PRIu32"\n",
6726 : ctx->attach_in_progress);
6727 : }
6728 :
6729 0 : _stop_discovery(ctx);
6730 0 : }
6731 :
6732 : static void
6733 2 : remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr)
6734 : {
6735 : struct discovery_ctx *d_ctx;
6736 : struct nvme_path_id *path_id;
6737 2 : struct spdk_nvme_transport_id trid = {};
6738 : struct discovery_entry_ctx *entry_ctx, *tmp;
6739 :
6740 2 : path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
6741 :
6742 2 : TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
6743 0 : TAILQ_FOREACH_SAFE(entry_ctx, &d_ctx->nvm_entry_ctxs, tailq, tmp) {
6744 0 : build_trid_from_log_page_entry(&trid, &entry_ctx->entry);
6745 0 : if (spdk_nvme_transport_id_compare(&trid, &path_id->trid) != 0) {
6746 0 : continue;
6747 : }
6748 :
6749 0 : TAILQ_REMOVE(&d_ctx->nvm_entry_ctxs, entry_ctx, tailq);
6750 0 : free(entry_ctx);
6751 0 : DISCOVERY_INFOLOG(d_ctx, "Remove discovery entry: %s:%s:%s\n",
6752 : trid.subnqn, trid.traddr, trid.trsvcid);
6753 :
6754 : /* Fail discovery ctrlr to force reattach attempt */
6755 0 : spdk_nvme_ctrlr_fail(d_ctx->ctrlr);
6756 : }
6757 : }
6758 2 : }
6759 :
6760 : static void
6761 0 : discovery_remove_controllers(struct discovery_ctx *ctx)
6762 : {
6763 0 : struct spdk_nvmf_discovery_log_page *log_page = ctx->log_page;
6764 : struct discovery_entry_ctx *entry_ctx, *tmp;
6765 : struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
6766 0 : struct spdk_nvme_transport_id old_trid = {};
6767 : uint64_t numrec, i;
6768 : bool found;
6769 :
6770 0 : numrec = from_le64(&log_page->numrec);
6771 0 : TAILQ_FOREACH_SAFE(entry_ctx, &ctx->nvm_entry_ctxs, tailq, tmp) {
6772 0 : found = false;
6773 0 : old_entry = &entry_ctx->entry;
6774 0 : build_trid_from_log_page_entry(&old_trid, old_entry);
6775 0 : for (i = 0; i < numrec; i++) {
6776 0 : new_entry = &log_page->entries[i];
6777 0 : if (!memcmp(old_entry, new_entry, sizeof(*old_entry))) {
6778 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s found again\n",
6779 : old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
6780 0 : found = true;
6781 0 : break;
6782 : }
6783 : }
6784 0 : if (!found) {
6785 0 : struct nvme_path_id path = {};
6786 :
6787 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s not found\n",
6788 : old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
6789 :
6790 0 : path.trid = entry_ctx->trid;
6791 0 : bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
6792 0 : TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
6793 0 : free(entry_ctx);
6794 : }
6795 : }
6796 0 : free(log_page);
6797 0 : ctx->log_page = NULL;
6798 0 : discovery_complete(ctx);
6799 0 : }
6800 :
6801 : static void
6802 0 : complete_discovery_start(struct discovery_ctx *ctx, int status)
6803 : {
6804 0 : ctx->timeout_ticks = 0;
6805 0 : ctx->rc = status;
6806 0 : if (ctx->start_cb_fn) {
6807 0 : ctx->start_cb_fn(ctx->cb_ctx, status);
6808 0 : ctx->start_cb_fn = NULL;
6809 0 : ctx->cb_ctx = NULL;
6810 : }
6811 0 : }
6812 :
6813 : static void
6814 0 : discovery_attach_controller_done(void *cb_ctx, size_t bdev_count, int rc)
6815 : {
6816 0 : struct discovery_entry_ctx *entry_ctx = cb_ctx;
6817 0 : struct discovery_ctx *ctx = entry_ctx->ctx;
6818 :
6819 0 : DISCOVERY_INFOLOG(ctx, "attach %s done\n", entry_ctx->name);
6820 0 : ctx->attach_in_progress--;
6821 0 : if (ctx->attach_in_progress == 0) {
6822 0 : complete_discovery_start(ctx, ctx->rc);
6823 0 : if (ctx->initializing && ctx->rc != 0) {
6824 0 : DISCOVERY_ERRLOG(ctx, "stopping discovery due to errors: %d\n", ctx->rc);
6825 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
6826 : } else {
6827 0 : discovery_remove_controllers(ctx);
6828 : }
6829 : }
6830 0 : }
6831 :
6832 : static struct discovery_entry_ctx *
6833 0 : create_discovery_entry_ctx(struct discovery_ctx *ctx, struct spdk_nvme_transport_id *trid)
6834 : {
6835 : struct discovery_entry_ctx *new_ctx;
6836 :
6837 0 : new_ctx = calloc(1, sizeof(*new_ctx));
6838 0 : if (new_ctx == NULL) {
6839 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6840 0 : return NULL;
6841 : }
6842 :
6843 0 : new_ctx->ctx = ctx;
6844 0 : memcpy(&new_ctx->trid, trid, sizeof(*trid));
6845 0 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
6846 0 : snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
6847 0 : return new_ctx;
6848 : }
6849 :
6850 : static void
6851 0 : discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
6852 : struct spdk_nvmf_discovery_log_page *log_page)
6853 : {
6854 0 : struct discovery_ctx *ctx = cb_arg;
6855 : struct discovery_entry_ctx *entry_ctx, *tmp;
6856 : struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
6857 : uint64_t numrec, i;
6858 : bool found;
6859 :
6860 0 : if (rc || spdk_nvme_cpl_is_error(cpl)) {
6861 0 : DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
6862 0 : return;
6863 : }
6864 :
6865 0 : ctx->log_page = log_page;
6866 0 : assert(ctx->attach_in_progress == 0);
6867 0 : numrec = from_le64(&log_page->numrec);
6868 0 : TAILQ_FOREACH_SAFE(entry_ctx, &ctx->discovery_entry_ctxs, tailq, tmp) {
6869 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
6870 0 : free(entry_ctx);
6871 : }
6872 0 : for (i = 0; i < numrec; i++) {
6873 0 : found = false;
6874 0 : new_entry = &log_page->entries[i];
6875 0 : if (new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT ||
6876 0 : new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
6877 : struct discovery_entry_ctx *new_ctx;
6878 0 : struct spdk_nvme_transport_id trid = {};
6879 :
6880 0 : build_trid_from_log_page_entry(&trid, new_entry);
6881 0 : new_ctx = create_discovery_entry_ctx(ctx, &trid);
6882 0 : if (new_ctx == NULL) {
6883 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6884 0 : break;
6885 : }
6886 :
6887 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, new_ctx, tailq);
6888 0 : continue;
6889 : }
6890 0 : TAILQ_FOREACH(entry_ctx, &ctx->nvm_entry_ctxs, tailq) {
6891 0 : old_entry = &entry_ctx->entry;
6892 0 : if (!memcmp(new_entry, old_entry, sizeof(*new_entry))) {
6893 0 : found = true;
6894 0 : break;
6895 : }
6896 : }
6897 0 : if (!found) {
6898 0 : struct discovery_entry_ctx *subnqn_ctx = NULL, *new_ctx;
6899 : struct discovery_ctx *d_ctx;
6900 :
6901 0 : TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
6902 0 : TAILQ_FOREACH(subnqn_ctx, &d_ctx->nvm_entry_ctxs, tailq) {
6903 0 : if (!memcmp(subnqn_ctx->entry.subnqn, new_entry->subnqn,
6904 : sizeof(new_entry->subnqn))) {
6905 0 : break;
6906 : }
6907 : }
6908 0 : if (subnqn_ctx) {
6909 0 : break;
6910 : }
6911 : }
6912 :
6913 0 : new_ctx = calloc(1, sizeof(*new_ctx));
6914 0 : if (new_ctx == NULL) {
6915 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6916 0 : break;
6917 : }
6918 :
6919 0 : new_ctx->ctx = ctx;
6920 0 : memcpy(&new_ctx->entry, new_entry, sizeof(*new_entry));
6921 0 : build_trid_from_log_page_entry(&new_ctx->trid, new_entry);
6922 0 : if (subnqn_ctx) {
6923 0 : snprintf(new_ctx->name, sizeof(new_ctx->name), "%s", subnqn_ctx->name);
6924 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new path for %s\n",
6925 : new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
6926 : new_ctx->name);
6927 : } else {
6928 0 : snprintf(new_ctx->name, sizeof(new_ctx->name), "%s%d", ctx->name, ctx->index++);
6929 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new subsystem %s\n",
6930 : new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
6931 : new_ctx->name);
6932 : }
6933 0 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
6934 0 : snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
6935 0 : rc = bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0,
6936 : discovery_attach_controller_done, new_ctx,
6937 : &new_ctx->drv_opts, &ctx->bdev_opts, true);
6938 0 : if (rc == 0) {
6939 0 : TAILQ_INSERT_TAIL(&ctx->nvm_entry_ctxs, new_ctx, tailq);
6940 0 : ctx->attach_in_progress++;
6941 : } else {
6942 0 : DISCOVERY_ERRLOG(ctx, "bdev_nvme_create failed (%s)\n", spdk_strerror(-rc));
6943 : }
6944 : }
6945 : }
6946 :
6947 0 : if (ctx->attach_in_progress == 0) {
6948 0 : discovery_remove_controllers(ctx);
6949 : }
6950 : }
6951 :
6952 : static void
6953 0 : get_discovery_log_page(struct discovery_ctx *ctx)
6954 : {
6955 : int rc;
6956 :
6957 0 : assert(ctx->in_progress == false);
6958 0 : ctx->in_progress = true;
6959 0 : rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx);
6960 0 : if (rc != 0) {
6961 0 : DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
6962 : }
6963 0 : DISCOVERY_INFOLOG(ctx, "sent discovery log page command\n");
6964 0 : }
6965 :
6966 : static void
6967 0 : discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
6968 : {
6969 0 : struct discovery_ctx *ctx = arg;
6970 0 : uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
6971 :
6972 0 : if (spdk_nvme_cpl_is_error(cpl)) {
6973 0 : DISCOVERY_ERRLOG(ctx, "aer failed\n");
6974 0 : return;
6975 : }
6976 :
6977 0 : if (log_page_id != SPDK_NVME_LOG_DISCOVERY) {
6978 0 : DISCOVERY_ERRLOG(ctx, "unexpected log page 0x%x\n", log_page_id);
6979 0 : return;
6980 : }
6981 :
6982 0 : DISCOVERY_INFOLOG(ctx, "got aer\n");
6983 0 : if (ctx->in_progress) {
6984 0 : ctx->pending = true;
6985 0 : return;
6986 : }
6987 :
6988 0 : get_discovery_log_page(ctx);
6989 : }
6990 :
6991 : static void
6992 0 : discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6993 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
6994 : {
6995 0 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
6996 : struct discovery_ctx *ctx;
6997 :
6998 0 : ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, drv_opts);
6999 :
7000 0 : DISCOVERY_INFOLOG(ctx, "discovery ctrlr attached\n");
7001 0 : ctx->probe_ctx = NULL;
7002 0 : ctx->ctrlr = ctrlr;
7003 :
7004 0 : if (ctx->rc != 0) {
7005 0 : DISCOVERY_ERRLOG(ctx, "encountered error while attaching discovery ctrlr: %d\n",
7006 : ctx->rc);
7007 0 : return;
7008 : }
7009 :
7010 0 : spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx);
7011 : }
7012 :
7013 : static int
7014 0 : discovery_poller(void *arg)
7015 : {
7016 0 : struct discovery_ctx *ctx = arg;
7017 : struct spdk_nvme_transport_id *trid;
7018 : int rc;
7019 :
7020 0 : if (ctx->detach_ctx) {
7021 0 : rc = spdk_nvme_detach_poll_async(ctx->detach_ctx);
7022 0 : if (rc != -EAGAIN) {
7023 0 : ctx->detach_ctx = NULL;
7024 0 : ctx->ctrlr = NULL;
7025 : }
7026 0 : } else if (ctx->stop) {
7027 0 : if (ctx->ctrlr != NULL) {
7028 0 : rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
7029 0 : if (rc == 0) {
7030 0 : return SPDK_POLLER_BUSY;
7031 : }
7032 0 : DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
7033 : }
7034 0 : spdk_poller_unregister(&ctx->poller);
7035 0 : TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
7036 0 : assert(ctx->start_cb_fn == NULL);
7037 0 : if (ctx->stop_cb_fn != NULL) {
7038 0 : ctx->stop_cb_fn(ctx->cb_ctx);
7039 : }
7040 0 : free_discovery_ctx(ctx);
7041 0 : } else if (ctx->probe_ctx == NULL && ctx->ctrlr == NULL) {
7042 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7043 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
7044 0 : assert(ctx->initializing);
7045 0 : spdk_poller_unregister(&ctx->poller);
7046 0 : TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
7047 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7048 0 : stop_discovery(ctx, NULL, NULL);
7049 0 : free_discovery_ctx(ctx);
7050 0 : return SPDK_POLLER_BUSY;
7051 : }
7052 :
7053 0 : assert(ctx->entry_ctx_in_use == NULL);
7054 0 : ctx->entry_ctx_in_use = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
7055 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7056 0 : trid = &ctx->entry_ctx_in_use->trid;
7057 0 : ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, discovery_attach_cb);
7058 0 : if (ctx->probe_ctx) {
7059 0 : spdk_poller_unregister(&ctx->poller);
7060 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000);
7061 : } else {
7062 0 : DISCOVERY_ERRLOG(ctx, "could not start discovery connect\n");
7063 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7064 0 : ctx->entry_ctx_in_use = NULL;
7065 : }
7066 0 : } else if (ctx->probe_ctx) {
7067 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7068 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
7069 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7070 0 : return SPDK_POLLER_BUSY;
7071 : }
7072 :
7073 0 : rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
7074 0 : if (rc != -EAGAIN) {
7075 0 : if (ctx->rc != 0) {
7076 0 : assert(ctx->initializing);
7077 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
7078 : } else {
7079 0 : assert(rc == 0);
7080 0 : DISCOVERY_INFOLOG(ctx, "discovery ctrlr connected\n");
7081 0 : ctx->rc = rc;
7082 0 : get_discovery_log_page(ctx);
7083 : }
7084 : }
7085 : } else {
7086 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7087 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching NVM ctrlrs\n");
7088 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7089 : /* We need to wait until all NVM ctrlrs are attached before we stop the
7090 : * discovery service to make sure we don't detach a ctrlr that is still
7091 : * being attached.
7092 : */
7093 0 : if (ctx->attach_in_progress == 0) {
7094 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
7095 0 : return SPDK_POLLER_BUSY;
7096 : }
7097 : }
7098 :
7099 0 : rc = spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr);
7100 0 : if (rc < 0) {
7101 0 : spdk_poller_unregister(&ctx->poller);
7102 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
7103 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7104 0 : ctx->entry_ctx_in_use = NULL;
7105 :
7106 0 : rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
7107 0 : if (rc != 0) {
7108 0 : DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
7109 0 : ctx->ctrlr = NULL;
7110 : }
7111 : }
7112 : }
7113 :
7114 0 : return SPDK_POLLER_BUSY;
7115 : }
7116 :
7117 : static void
7118 0 : start_discovery_poller(void *arg)
7119 : {
7120 0 : struct discovery_ctx *ctx = arg;
7121 :
7122 0 : TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq);
7123 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
7124 0 : }
7125 :
7126 : int
7127 0 : bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid,
7128 : const char *base_name,
7129 : struct spdk_nvme_ctrlr_opts *drv_opts,
7130 : struct nvme_ctrlr_opts *bdev_opts,
7131 : uint64_t attach_timeout,
7132 : bool from_mdns,
7133 : spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx)
7134 : {
7135 : struct discovery_ctx *ctx;
7136 : struct discovery_entry_ctx *discovery_entry_ctx;
7137 :
7138 0 : snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
7139 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7140 0 : if (strcmp(ctx->name, base_name) == 0) {
7141 0 : return -EEXIST;
7142 : }
7143 :
7144 0 : if (ctx->entry_ctx_in_use != NULL) {
7145 0 : if (!spdk_nvme_transport_id_compare(trid, &ctx->entry_ctx_in_use->trid)) {
7146 0 : return -EEXIST;
7147 : }
7148 : }
7149 :
7150 0 : TAILQ_FOREACH(discovery_entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
7151 0 : if (!spdk_nvme_transport_id_compare(trid, &discovery_entry_ctx->trid)) {
7152 0 : return -EEXIST;
7153 : }
7154 : }
7155 : }
7156 :
7157 0 : ctx = calloc(1, sizeof(*ctx));
7158 0 : if (ctx == NULL) {
7159 0 : return -ENOMEM;
7160 : }
7161 :
7162 0 : ctx->name = strdup(base_name);
7163 0 : if (ctx->name == NULL) {
7164 0 : free_discovery_ctx(ctx);
7165 0 : return -ENOMEM;
7166 : }
7167 0 : memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
7168 0 : memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
7169 0 : ctx->from_mdns_discovery_service = from_mdns;
7170 0 : ctx->bdev_opts.from_discovery_service = true;
7171 0 : ctx->calling_thread = spdk_get_thread();
7172 0 : ctx->start_cb_fn = cb_fn;
7173 0 : ctx->cb_ctx = cb_ctx;
7174 0 : ctx->initializing = true;
7175 0 : if (ctx->start_cb_fn) {
7176 : /* We can use this when dumping json to denote if this RPC parameter
7177 : * was specified or not.
7178 : */
7179 0 : ctx->wait_for_attach = true;
7180 : }
7181 0 : if (attach_timeout != 0) {
7182 0 : ctx->timeout_ticks = spdk_get_ticks() + attach_timeout *
7183 0 : spdk_get_ticks_hz() / 1000ull;
7184 : }
7185 0 : TAILQ_INIT(&ctx->nvm_entry_ctxs);
7186 0 : TAILQ_INIT(&ctx->discovery_entry_ctxs);
7187 0 : memcpy(&ctx->trid, trid, sizeof(*trid));
7188 : /* Even if user did not specify hostnqn, we can still strdup("\0"); */
7189 0 : ctx->hostnqn = strdup(ctx->drv_opts.hostnqn);
7190 0 : if (ctx->hostnqn == NULL) {
7191 0 : free_discovery_ctx(ctx);
7192 0 : return -ENOMEM;
7193 : }
7194 0 : discovery_entry_ctx = create_discovery_entry_ctx(ctx, trid);
7195 0 : if (discovery_entry_ctx == NULL) {
7196 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
7197 0 : free_discovery_ctx(ctx);
7198 0 : return -ENOMEM;
7199 : }
7200 :
7201 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, discovery_entry_ctx, tailq);
7202 0 : spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx);
7203 0 : return 0;
7204 : }
7205 :
7206 : int
7207 0 : bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
7208 : {
7209 : struct discovery_ctx *ctx;
7210 :
7211 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7212 0 : if (strcmp(name, ctx->name) == 0) {
7213 0 : if (ctx->stop) {
7214 0 : return -EALREADY;
7215 : }
7216 : /* If we're still starting the discovery service and ->rc is non-zero, we're
7217 : * going to stop it as soon as we can
7218 : */
7219 0 : if (ctx->initializing && ctx->rc != 0) {
7220 0 : return -EALREADY;
7221 : }
7222 0 : stop_discovery(ctx, cb_fn, cb_ctx);
7223 0 : return 0;
7224 : }
7225 : }
7226 :
7227 0 : return -ENOENT;
7228 : }
7229 :
7230 : static int
7231 1 : bdev_nvme_library_init(void)
7232 : {
7233 1 : g_bdev_nvme_init_thread = spdk_get_thread();
7234 :
7235 1 : spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_create_poll_group_cb,
7236 : bdev_nvme_destroy_poll_group_cb,
7237 : sizeof(struct nvme_poll_group), "nvme_poll_groups");
7238 :
7239 1 : return 0;
7240 : }
7241 :
7242 : static void
7243 1 : bdev_nvme_fini_destruct_ctrlrs(void)
7244 : {
7245 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
7246 : struct nvme_ctrlr *nvme_ctrlr;
7247 :
7248 1 : pthread_mutex_lock(&g_bdev_nvme_mutex);
7249 1 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
7250 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
7251 0 : pthread_mutex_lock(&nvme_ctrlr->mutex);
7252 0 : if (nvme_ctrlr->destruct) {
7253 : /* This controller's destruction was already started
7254 : * before the application started shutting down
7255 : */
7256 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
7257 0 : continue;
7258 : }
7259 0 : nvme_ctrlr->destruct = true;
7260 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
7261 :
7262 0 : spdk_thread_send_msg(nvme_ctrlr->thread, _nvme_ctrlr_destruct,
7263 : nvme_ctrlr);
7264 : }
7265 : }
7266 :
7267 1 : g_bdev_nvme_module_finish = true;
7268 1 : if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
7269 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
7270 1 : spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
7271 1 : spdk_bdev_module_fini_done();
7272 1 : return;
7273 : }
7274 :
7275 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
7276 : }
7277 :
7278 : static void
7279 0 : check_discovery_fini(void *arg)
7280 : {
7281 0 : if (TAILQ_EMPTY(&g_discovery_ctxs)) {
7282 0 : bdev_nvme_fini_destruct_ctrlrs();
7283 : }
7284 0 : }
7285 :
7286 : static void
7287 1 : bdev_nvme_library_fini(void)
7288 : {
7289 : struct nvme_probe_skip_entry *entry, *entry_tmp;
7290 : struct discovery_ctx *ctx;
7291 :
7292 1 : spdk_poller_unregister(&g_hotplug_poller);
7293 1 : free(g_hotplug_probe_ctx);
7294 1 : g_hotplug_probe_ctx = NULL;
7295 :
7296 1 : TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
7297 0 : TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
7298 0 : free(entry);
7299 : }
7300 :
7301 1 : assert(spdk_get_thread() == g_bdev_nvme_init_thread);
7302 1 : if (TAILQ_EMPTY(&g_discovery_ctxs)) {
7303 1 : bdev_nvme_fini_destruct_ctrlrs();
7304 : } else {
7305 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7306 0 : stop_discovery(ctx, check_discovery_fini, NULL);
7307 : }
7308 : }
7309 1 : }
7310 :
7311 : static void
7312 0 : bdev_nvme_verify_pi_error(struct nvme_bdev_io *bio)
7313 : {
7314 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7315 0 : struct spdk_bdev *bdev = bdev_io->bdev;
7316 0 : struct spdk_dif_ctx dif_ctx;
7317 0 : struct spdk_dif_error err_blk = {};
7318 : int rc;
7319 0 : struct spdk_dif_ctx_init_ext_opts dif_opts;
7320 :
7321 0 : dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
7322 0 : dif_opts.dif_pi_format = bdev->dif_pi_format;
7323 0 : rc = spdk_dif_ctx_init(&dif_ctx,
7324 0 : bdev->blocklen, bdev->md_len, bdev->md_interleave,
7325 0 : bdev->dif_is_head_of_md, bdev->dif_type,
7326 : bdev_io->u.bdev.dif_check_flags,
7327 0 : bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0, &dif_opts);
7328 0 : if (rc != 0) {
7329 0 : SPDK_ERRLOG("Initialization of DIF context failed\n");
7330 0 : return;
7331 : }
7332 :
7333 0 : if (bdev->md_interleave) {
7334 0 : rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
7335 0 : bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
7336 : } else {
7337 0 : struct iovec md_iov = {
7338 0 : .iov_base = bdev_io->u.bdev.md_buf,
7339 0 : .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len,
7340 : };
7341 :
7342 0 : rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
7343 0 : &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
7344 : }
7345 :
7346 0 : if (rc != 0) {
7347 0 : SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
7348 : err_blk.err_type, err_blk.err_offset);
7349 : } else {
7350 0 : SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
7351 : }
7352 : }
7353 :
7354 : static void
7355 0 : bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7356 : {
7357 0 : struct nvme_bdev_io *bio = ref;
7358 :
7359 0 : if (spdk_nvme_cpl_is_success(cpl)) {
7360 : /* Run PI verification for read data buffer. */
7361 0 : bdev_nvme_verify_pi_error(bio);
7362 : }
7363 :
7364 : /* Return original completion status */
7365 0 : bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
7366 0 : }
7367 :
7368 : static void
7369 3 : bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7370 : {
7371 3 : struct nvme_bdev_io *bio = ref;
7372 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7373 : int ret;
7374 :
7375 3 : if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
7376 0 : SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
7377 : cpl->status.sct, cpl->status.sc);
7378 :
7379 : /* Save completion status to use after verifying PI error. */
7380 0 : bio->cpl = *cpl;
7381 :
7382 0 : if (spdk_likely(nvme_io_path_is_available(bio->io_path))) {
7383 : /* Read without PI checking to verify PI error. */
7384 0 : ret = bdev_nvme_no_pi_readv(bio,
7385 : bdev_io->u.bdev.iovs,
7386 : bdev_io->u.bdev.iovcnt,
7387 : bdev_io->u.bdev.md_buf,
7388 : bdev_io->u.bdev.num_blocks,
7389 : bdev_io->u.bdev.offset_blocks);
7390 0 : if (ret == 0) {
7391 0 : return;
7392 : }
7393 : }
7394 : }
7395 :
7396 3 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7397 : }
7398 :
7399 : static void
7400 25 : bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7401 : {
7402 25 : struct nvme_bdev_io *bio = ref;
7403 :
7404 25 : if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
7405 0 : SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
7406 : cpl->status.sct, cpl->status.sc);
7407 : /* Run PI verification for write data buffer if PI error is detected. */
7408 0 : bdev_nvme_verify_pi_error(bio);
7409 : }
7410 :
7411 25 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7412 25 : }
7413 :
7414 : static void
7415 0 : bdev_nvme_zone_appendv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7416 : {
7417 0 : struct nvme_bdev_io *bio = ref;
7418 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7419 :
7420 : /* spdk_bdev_io_get_append_location() requires that the ALBA is stored in offset_blocks.
7421 : * Additionally, offset_blocks has to be set before calling bdev_nvme_verify_pi_error().
7422 : */
7423 0 : bdev_io->u.bdev.offset_blocks = *(uint64_t *)&cpl->cdw0;
7424 :
7425 0 : if (spdk_nvme_cpl_is_pi_error(cpl)) {
7426 0 : SPDK_ERRLOG("zone append completed with PI error (sct=%d, sc=%d)\n",
7427 : cpl->status.sct, cpl->status.sc);
7428 : /* Run PI verification for zone append data buffer if PI error is detected. */
7429 0 : bdev_nvme_verify_pi_error(bio);
7430 : }
7431 :
7432 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7433 0 : }
7434 :
7435 : static void
7436 1 : bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7437 : {
7438 1 : struct nvme_bdev_io *bio = ref;
7439 :
7440 1 : if (spdk_nvme_cpl_is_pi_error(cpl)) {
7441 0 : SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n",
7442 : cpl->status.sct, cpl->status.sc);
7443 : /* Run PI verification for compare data buffer if PI error is detected. */
7444 0 : bdev_nvme_verify_pi_error(bio);
7445 : }
7446 :
7447 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7448 1 : }
7449 :
7450 : static void
7451 4 : bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7452 : {
7453 4 : struct nvme_bdev_io *bio = ref;
7454 :
7455 : /* Compare operation completion */
7456 4 : if (!bio->first_fused_completed) {
7457 : /* Save compare result for write callback */
7458 2 : bio->cpl = *cpl;
7459 2 : bio->first_fused_completed = true;
7460 2 : return;
7461 : }
7462 :
7463 : /* Write operation completion */
7464 2 : if (spdk_nvme_cpl_is_error(&bio->cpl)) {
7465 : /* If bio->cpl is already an error, it means the compare operation failed. In that case,
7466 : * complete the IO with the compare operation's status.
7467 : */
7468 1 : if (!spdk_nvme_cpl_is_error(cpl)) {
7469 1 : SPDK_ERRLOG("Unexpected write success after compare failure.\n");
7470 : }
7471 :
7472 1 : bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
7473 : } else {
7474 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7475 : }
7476 : }
7477 :
7478 : static void
7479 1 : bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
7480 : {
7481 1 : struct nvme_bdev_io *bio = ref;
7482 :
7483 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7484 1 : }
7485 :
7486 : static int
7487 0 : fill_zone_from_report(struct spdk_bdev_zone_info *info, struct spdk_nvme_zns_zone_desc *desc)
7488 : {
7489 0 : switch (desc->zt) {
7490 0 : case SPDK_NVME_ZONE_TYPE_SEQWR:
7491 0 : info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
7492 0 : break;
7493 0 : default:
7494 0 : SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", desc->zt);
7495 0 : return -EIO;
7496 : }
7497 :
7498 0 : switch (desc->zs) {
7499 0 : case SPDK_NVME_ZONE_STATE_EMPTY:
7500 0 : info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
7501 0 : break;
7502 0 : case SPDK_NVME_ZONE_STATE_IOPEN:
7503 0 : info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
7504 0 : break;
7505 0 : case SPDK_NVME_ZONE_STATE_EOPEN:
7506 0 : info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
7507 0 : break;
7508 0 : case SPDK_NVME_ZONE_STATE_CLOSED:
7509 0 : info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
7510 0 : break;
7511 0 : case SPDK_NVME_ZONE_STATE_RONLY:
7512 0 : info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
7513 0 : break;
7514 0 : case SPDK_NVME_ZONE_STATE_FULL:
7515 0 : info->state = SPDK_BDEV_ZONE_STATE_FULL;
7516 0 : break;
7517 0 : case SPDK_NVME_ZONE_STATE_OFFLINE:
7518 0 : info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
7519 0 : break;
7520 0 : default:
7521 0 : SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", desc->zs);
7522 0 : return -EIO;
7523 : }
7524 :
7525 0 : info->zone_id = desc->zslba;
7526 0 : info->write_pointer = desc->wp;
7527 0 : info->capacity = desc->zcap;
7528 :
7529 0 : return 0;
7530 : }
7531 :
7532 : static void
7533 0 : bdev_nvme_get_zone_info_done(void *ref, const struct spdk_nvme_cpl *cpl)
7534 : {
7535 0 : struct nvme_bdev_io *bio = ref;
7536 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7537 0 : uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
7538 0 : uint32_t zones_to_copy = bdev_io->u.zone_mgmt.num_zones;
7539 0 : struct spdk_bdev_zone_info *info = bdev_io->u.zone_mgmt.buf;
7540 : uint64_t max_zones_per_buf, i;
7541 : uint32_t zone_report_bufsize;
7542 : struct spdk_nvme_ns *ns;
7543 : struct spdk_nvme_qpair *qpair;
7544 : int ret;
7545 :
7546 0 : if (spdk_nvme_cpl_is_error(cpl)) {
7547 0 : goto out_complete_io_nvme_cpl;
7548 : }
7549 :
7550 0 : if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
7551 0 : ret = -ENXIO;
7552 0 : goto out_complete_io_ret;
7553 : }
7554 :
7555 0 : ns = bio->io_path->nvme_ns->ns;
7556 0 : qpair = bio->io_path->qpair->qpair;
7557 :
7558 0 : zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
7559 0 : max_zones_per_buf = (zone_report_bufsize - sizeof(*bio->zone_report_buf)) /
7560 : sizeof(bio->zone_report_buf->descs[0]);
7561 :
7562 0 : if (bio->zone_report_buf->nr_zones > max_zones_per_buf) {
7563 0 : ret = -EINVAL;
7564 0 : goto out_complete_io_ret;
7565 : }
7566 :
7567 0 : if (!bio->zone_report_buf->nr_zones) {
7568 0 : ret = -EINVAL;
7569 0 : goto out_complete_io_ret;
7570 : }
7571 :
7572 0 : for (i = 0; i < bio->zone_report_buf->nr_zones && bio->handled_zones < zones_to_copy; i++) {
7573 0 : ret = fill_zone_from_report(&info[bio->handled_zones],
7574 0 : &bio->zone_report_buf->descs[i]);
7575 0 : if (ret) {
7576 0 : goto out_complete_io_ret;
7577 : }
7578 0 : bio->handled_zones++;
7579 : }
7580 :
7581 0 : if (bio->handled_zones < zones_to_copy) {
7582 0 : uint64_t zone_size_lba = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
7583 0 : uint64_t slba = zone_id + (zone_size_lba * bio->handled_zones);
7584 :
7585 0 : memset(bio->zone_report_buf, 0, zone_report_bufsize);
7586 0 : ret = spdk_nvme_zns_report_zones(ns, qpair,
7587 0 : bio->zone_report_buf, zone_report_bufsize,
7588 : slba, SPDK_NVME_ZRA_LIST_ALL, true,
7589 : bdev_nvme_get_zone_info_done, bio);
7590 0 : if (!ret) {
7591 0 : return;
7592 : } else {
7593 0 : goto out_complete_io_ret;
7594 : }
7595 : }
7596 :
7597 0 : out_complete_io_nvme_cpl:
7598 0 : free(bio->zone_report_buf);
7599 0 : bio->zone_report_buf = NULL;
7600 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7601 0 : return;
7602 :
7603 0 : out_complete_io_ret:
7604 0 : free(bio->zone_report_buf);
7605 0 : bio->zone_report_buf = NULL;
7606 0 : bdev_nvme_io_complete(bio, ret);
7607 : }
7608 :
7609 : static void
7610 0 : bdev_nvme_zone_management_done(void *ref, const struct spdk_nvme_cpl *cpl)
7611 : {
7612 0 : struct nvme_bdev_io *bio = ref;
7613 :
7614 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7615 0 : }
7616 :
7617 : static void
7618 4 : bdev_nvme_admin_passthru_complete_nvme_status(void *ctx)
7619 : {
7620 4 : struct nvme_bdev_io *bio = ctx;
7621 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7622 4 : const struct spdk_nvme_cpl *cpl = &bio->cpl;
7623 :
7624 4 : assert(bdev_nvme_io_type_is_admin(bdev_io->type));
7625 :
7626 4 : __bdev_nvme_io_complete(bdev_io, 0, cpl);
7627 4 : }
7628 :
7629 : static void
7630 3 : bdev_nvme_abort_complete(void *ctx)
7631 : {
7632 3 : struct nvme_bdev_io *bio = ctx;
7633 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7634 :
7635 3 : if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) {
7636 3 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS, NULL);
7637 : } else {
7638 0 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED, NULL);
7639 : }
7640 3 : }
7641 :
7642 : static void
7643 3 : bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl)
7644 : {
7645 3 : struct nvme_bdev_io *bio = ref;
7646 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7647 :
7648 3 : bio->cpl = *cpl;
7649 3 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_nvme_abort_complete, bio);
7650 3 : }
7651 :
7652 : static void
7653 4 : bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
7654 : {
7655 4 : struct nvme_bdev_io *bio = ref;
7656 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7657 :
7658 4 : bio->cpl = *cpl;
7659 4 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
7660 : bdev_nvme_admin_passthru_complete_nvme_status, bio);
7661 4 : }
7662 :
7663 : static void
7664 0 : bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
7665 : {
7666 0 : struct nvme_bdev_io *bio = ref;
7667 : struct iovec *iov;
7668 :
7669 0 : bio->iov_offset = sgl_offset;
7670 0 : for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
7671 0 : iov = &bio->iovs[bio->iovpos];
7672 0 : if (bio->iov_offset < iov->iov_len) {
7673 0 : break;
7674 : }
7675 :
7676 0 : bio->iov_offset -= iov->iov_len;
7677 : }
7678 0 : }
7679 :
7680 : static int
7681 0 : bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
7682 : {
7683 0 : struct nvme_bdev_io *bio = ref;
7684 : struct iovec *iov;
7685 :
7686 0 : assert(bio->iovpos < bio->iovcnt);
7687 :
7688 0 : iov = &bio->iovs[bio->iovpos];
7689 :
7690 0 : *address = iov->iov_base;
7691 0 : *length = iov->iov_len;
7692 :
7693 0 : if (bio->iov_offset) {
7694 0 : assert(bio->iov_offset <= iov->iov_len);
7695 0 : *address += bio->iov_offset;
7696 0 : *length -= bio->iov_offset;
7697 : }
7698 :
7699 0 : bio->iov_offset += *length;
7700 0 : if (bio->iov_offset == iov->iov_len) {
7701 0 : bio->iovpos++;
7702 0 : bio->iov_offset = 0;
7703 : }
7704 :
7705 0 : return 0;
7706 : }
7707 :
7708 : static void
7709 0 : bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset)
7710 : {
7711 0 : struct nvme_bdev_io *bio = ref;
7712 : struct iovec *iov;
7713 :
7714 0 : bio->fused_iov_offset = sgl_offset;
7715 0 : for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) {
7716 0 : iov = &bio->fused_iovs[bio->fused_iovpos];
7717 0 : if (bio->fused_iov_offset < iov->iov_len) {
7718 0 : break;
7719 : }
7720 :
7721 0 : bio->fused_iov_offset -= iov->iov_len;
7722 : }
7723 0 : }
7724 :
7725 : static int
7726 0 : bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length)
7727 : {
7728 0 : struct nvme_bdev_io *bio = ref;
7729 : struct iovec *iov;
7730 :
7731 0 : assert(bio->fused_iovpos < bio->fused_iovcnt);
7732 :
7733 0 : iov = &bio->fused_iovs[bio->fused_iovpos];
7734 :
7735 0 : *address = iov->iov_base;
7736 0 : *length = iov->iov_len;
7737 :
7738 0 : if (bio->fused_iov_offset) {
7739 0 : assert(bio->fused_iov_offset <= iov->iov_len);
7740 0 : *address += bio->fused_iov_offset;
7741 0 : *length -= bio->fused_iov_offset;
7742 : }
7743 :
7744 0 : bio->fused_iov_offset += *length;
7745 0 : if (bio->fused_iov_offset == iov->iov_len) {
7746 0 : bio->fused_iovpos++;
7747 0 : bio->fused_iov_offset = 0;
7748 : }
7749 :
7750 0 : return 0;
7751 : }
7752 :
7753 : static int
7754 0 : bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7755 : void *md, uint64_t lba_count, uint64_t lba)
7756 : {
7757 : int rc;
7758 :
7759 0 : SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 " without PI check\n",
7760 : lba_count, lba);
7761 :
7762 0 : bio->iovs = iov;
7763 0 : bio->iovcnt = iovcnt;
7764 0 : bio->iovpos = 0;
7765 0 : bio->iov_offset = 0;
7766 :
7767 0 : rc = spdk_nvme_ns_cmd_readv_with_md(bio->io_path->nvme_ns->ns,
7768 0 : bio->io_path->qpair->qpair,
7769 : lba, lba_count,
7770 : bdev_nvme_no_pi_readv_done, bio, 0,
7771 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
7772 : md, 0, 0);
7773 :
7774 0 : if (rc != 0 && rc != -ENOMEM) {
7775 0 : SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
7776 : }
7777 0 : return rc;
7778 : }
7779 :
7780 : static int
7781 3 : bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7782 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
7783 : struct spdk_memory_domain *domain, void *domain_ctx,
7784 : struct spdk_accel_sequence *seq)
7785 : {
7786 3 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7787 3 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7788 : int rc;
7789 :
7790 3 : SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7791 : lba_count, lba);
7792 :
7793 3 : bio->iovs = iov;
7794 3 : bio->iovcnt = iovcnt;
7795 3 : bio->iovpos = 0;
7796 3 : bio->iov_offset = 0;
7797 :
7798 3 : if (domain != NULL || seq != NULL) {
7799 1 : bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
7800 1 : bio->ext_opts.memory_domain = domain;
7801 1 : bio->ext_opts.memory_domain_ctx = domain_ctx;
7802 1 : bio->ext_opts.io_flags = flags;
7803 1 : bio->ext_opts.metadata = md;
7804 1 : bio->ext_opts.accel_sequence = seq;
7805 :
7806 1 : if (iovcnt == 1) {
7807 1 : rc = spdk_nvme_ns_cmd_read_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_readv_done,
7808 : bio, &bio->ext_opts);
7809 : } else {
7810 0 : rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count,
7811 : bdev_nvme_readv_done, bio,
7812 : bdev_nvme_queued_reset_sgl,
7813 : bdev_nvme_queued_next_sge,
7814 : &bio->ext_opts);
7815 : }
7816 2 : } else if (iovcnt == 1) {
7817 2 : rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base,
7818 : md, lba, lba_count, bdev_nvme_readv_done,
7819 : bio, flags, 0, 0);
7820 : } else {
7821 0 : rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count,
7822 : bdev_nvme_readv_done, bio, flags,
7823 : bdev_nvme_queued_reset_sgl,
7824 : bdev_nvme_queued_next_sge, md, 0, 0);
7825 : }
7826 :
7827 3 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
7828 0 : SPDK_ERRLOG("readv failed: rc = %d\n", rc);
7829 : }
7830 3 : return rc;
7831 : }
7832 :
7833 : static int
7834 25 : bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7835 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
7836 : struct spdk_memory_domain *domain, void *domain_ctx,
7837 : struct spdk_accel_sequence *seq,
7838 : union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13)
7839 : {
7840 25 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7841 25 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7842 : int rc;
7843 :
7844 25 : SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7845 : lba_count, lba);
7846 :
7847 25 : bio->iovs = iov;
7848 25 : bio->iovcnt = iovcnt;
7849 25 : bio->iovpos = 0;
7850 25 : bio->iov_offset = 0;
7851 :
7852 25 : if (domain != NULL || seq != NULL) {
7853 0 : bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
7854 0 : bio->ext_opts.memory_domain = domain;
7855 0 : bio->ext_opts.memory_domain_ctx = domain_ctx;
7856 0 : bio->ext_opts.io_flags = flags | SPDK_NVME_IO_FLAGS_DIRECTIVE(cdw12.write.dtype);
7857 0 : bio->ext_opts.cdw13 = cdw13.raw;
7858 0 : bio->ext_opts.metadata = md;
7859 0 : bio->ext_opts.accel_sequence = seq;
7860 :
7861 0 : if (iovcnt == 1) {
7862 0 : rc = spdk_nvme_ns_cmd_write_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_writev_done,
7863 : bio, &bio->ext_opts);
7864 : } else {
7865 0 : rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count,
7866 : bdev_nvme_writev_done, bio,
7867 : bdev_nvme_queued_reset_sgl,
7868 : bdev_nvme_queued_next_sge,
7869 : &bio->ext_opts);
7870 : }
7871 25 : } else if (iovcnt == 1) {
7872 25 : rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base,
7873 : md, lba, lba_count, bdev_nvme_writev_done,
7874 : bio, flags, 0, 0);
7875 : } else {
7876 0 : rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
7877 : bdev_nvme_writev_done, bio, flags,
7878 : bdev_nvme_queued_reset_sgl,
7879 : bdev_nvme_queued_next_sge, md, 0, 0);
7880 : }
7881 :
7882 25 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
7883 0 : SPDK_ERRLOG("writev failed: rc = %d\n", rc);
7884 : }
7885 25 : return rc;
7886 : }
7887 :
7888 : static int
7889 0 : bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7890 : void *md, uint64_t lba_count, uint64_t zslba,
7891 : uint32_t flags)
7892 : {
7893 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7894 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7895 : int rc;
7896 :
7897 0 : SPDK_DEBUGLOG(bdev_nvme, "zone append %" PRIu64 " blocks to zone start lba %#" PRIx64 "\n",
7898 : lba_count, zslba);
7899 :
7900 0 : bio->iovs = iov;
7901 0 : bio->iovcnt = iovcnt;
7902 0 : bio->iovpos = 0;
7903 0 : bio->iov_offset = 0;
7904 :
7905 0 : if (iovcnt == 1) {
7906 0 : rc = spdk_nvme_zns_zone_append_with_md(ns, qpair, iov[0].iov_base, md, zslba,
7907 : lba_count,
7908 : bdev_nvme_zone_appendv_done, bio,
7909 : flags,
7910 : 0, 0);
7911 : } else {
7912 0 : rc = spdk_nvme_zns_zone_appendv_with_md(ns, qpair, zslba, lba_count,
7913 : bdev_nvme_zone_appendv_done, bio, flags,
7914 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
7915 : md, 0, 0);
7916 : }
7917 :
7918 0 : if (rc != 0 && rc != -ENOMEM) {
7919 0 : SPDK_ERRLOG("zone append failed: rc = %d\n", rc);
7920 : }
7921 0 : return rc;
7922 : }
7923 :
7924 : static int
7925 1 : bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7926 : void *md, uint64_t lba_count, uint64_t lba,
7927 : uint32_t flags)
7928 : {
7929 : int rc;
7930 :
7931 1 : SPDK_DEBUGLOG(bdev_nvme, "compare %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7932 : lba_count, lba);
7933 :
7934 1 : bio->iovs = iov;
7935 1 : bio->iovcnt = iovcnt;
7936 1 : bio->iovpos = 0;
7937 1 : bio->iov_offset = 0;
7938 :
7939 1 : rc = spdk_nvme_ns_cmd_comparev_with_md(bio->io_path->nvme_ns->ns,
7940 1 : bio->io_path->qpair->qpair,
7941 : lba, lba_count,
7942 : bdev_nvme_comparev_done, bio, flags,
7943 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
7944 : md, 0, 0);
7945 :
7946 1 : if (rc != 0 && rc != -ENOMEM) {
7947 0 : SPDK_ERRLOG("comparev failed: rc = %d\n", rc);
7948 : }
7949 1 : return rc;
7950 : }
7951 :
7952 : static int
7953 2 : bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt,
7954 : struct iovec *write_iov, int write_iovcnt,
7955 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags)
7956 : {
7957 2 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7958 2 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7959 2 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7960 : int rc;
7961 :
7962 2 : SPDK_DEBUGLOG(bdev_nvme, "compare and write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7963 : lba_count, lba);
7964 :
7965 2 : bio->iovs = cmp_iov;
7966 2 : bio->iovcnt = cmp_iovcnt;
7967 2 : bio->iovpos = 0;
7968 2 : bio->iov_offset = 0;
7969 2 : bio->fused_iovs = write_iov;
7970 2 : bio->fused_iovcnt = write_iovcnt;
7971 2 : bio->fused_iovpos = 0;
7972 2 : bio->fused_iov_offset = 0;
7973 :
7974 2 : if (bdev_io->num_retries == 0) {
7975 2 : bio->first_fused_submitted = false;
7976 2 : bio->first_fused_completed = false;
7977 : }
7978 :
7979 2 : if (!bio->first_fused_submitted) {
7980 2 : flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST;
7981 2 : memset(&bio->cpl, 0, sizeof(bio->cpl));
7982 :
7983 2 : rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count,
7984 : bdev_nvme_comparev_and_writev_done, bio, flags,
7985 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0);
7986 2 : if (rc == 0) {
7987 2 : bio->first_fused_submitted = true;
7988 2 : flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST;
7989 : } else {
7990 0 : if (rc != -ENOMEM) {
7991 0 : SPDK_ERRLOG("compare failed: rc = %d\n", rc);
7992 : }
7993 0 : return rc;
7994 : }
7995 : }
7996 :
7997 2 : flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND;
7998 :
7999 2 : rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
8000 : bdev_nvme_comparev_and_writev_done, bio, flags,
8001 : bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0);
8002 2 : if (rc != 0 && rc != -ENOMEM) {
8003 0 : SPDK_ERRLOG("write failed: rc = %d\n", rc);
8004 0 : rc = 0;
8005 : }
8006 :
8007 2 : return rc;
8008 : }
8009 :
8010 : static int
8011 1 : bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
8012 : {
8013 1 : struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
8014 : struct spdk_nvme_dsm_range *range;
8015 : uint64_t offset, remaining;
8016 : uint64_t num_ranges_u64;
8017 : uint16_t num_ranges;
8018 : int rc;
8019 :
8020 1 : num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
8021 : SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8022 1 : if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
8023 0 : SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
8024 0 : return -EINVAL;
8025 : }
8026 1 : num_ranges = (uint16_t)num_ranges_u64;
8027 :
8028 1 : offset = offset_blocks;
8029 1 : remaining = num_blocks;
8030 1 : range = &dsm_ranges[0];
8031 :
8032 : /* Fill max-size ranges until the remaining blocks fit into one range */
8033 1 : while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
8034 0 : range->attributes.raw = 0;
8035 0 : range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8036 0 : range->starting_lba = offset;
8037 :
8038 0 : offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8039 0 : remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8040 0 : range++;
8041 : }
8042 :
8043 : /* Final range describes the remaining blocks */
8044 1 : range->attributes.raw = 0;
8045 1 : range->length = remaining;
8046 1 : range->starting_lba = offset;
8047 :
8048 1 : rc = spdk_nvme_ns_cmd_dataset_management(bio->io_path->nvme_ns->ns,
8049 1 : bio->io_path->qpair->qpair,
8050 : SPDK_NVME_DSM_ATTR_DEALLOCATE,
8051 : dsm_ranges, num_ranges,
8052 : bdev_nvme_queued_done, bio);
8053 :
8054 1 : return rc;
8055 : }
8056 :
8057 : static int
8058 0 : bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
8059 : {
8060 0 : if (num_blocks > UINT16_MAX + 1) {
8061 0 : SPDK_ERRLOG("NVMe write zeroes is limited to 16-bit block count\n");
8062 0 : return -EINVAL;
8063 : }
8064 :
8065 0 : return spdk_nvme_ns_cmd_write_zeroes(bio->io_path->nvme_ns->ns,
8066 0 : bio->io_path->qpair->qpair,
8067 : offset_blocks, num_blocks,
8068 : bdev_nvme_queued_done, bio,
8069 : 0);
8070 : }
8071 :
8072 : static int
8073 0 : bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones,
8074 : struct spdk_bdev_zone_info *info)
8075 : {
8076 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8077 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8078 0 : uint32_t zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
8079 0 : uint64_t zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
8080 0 : uint64_t total_zones = spdk_nvme_zns_ns_get_num_zones(ns);
8081 :
8082 0 : if (zone_id % zone_size != 0) {
8083 0 : return -EINVAL;
8084 : }
8085 :
8086 0 : if (num_zones > total_zones || !num_zones) {
8087 0 : return -EINVAL;
8088 : }
8089 :
8090 0 : assert(!bio->zone_report_buf);
8091 0 : bio->zone_report_buf = calloc(1, zone_report_bufsize);
8092 0 : if (!bio->zone_report_buf) {
8093 0 : return -ENOMEM;
8094 : }
8095 :
8096 0 : bio->handled_zones = 0;
8097 :
8098 0 : return spdk_nvme_zns_report_zones(ns, qpair, bio->zone_report_buf, zone_report_bufsize,
8099 : zone_id, SPDK_NVME_ZRA_LIST_ALL, true,
8100 : bdev_nvme_get_zone_info_done, bio);
8101 : }
8102 :
8103 : static int
8104 0 : bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
8105 : enum spdk_bdev_zone_action action)
8106 : {
8107 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8108 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8109 :
8110 0 : switch (action) {
8111 0 : case SPDK_BDEV_ZONE_CLOSE:
8112 0 : return spdk_nvme_zns_close_zone(ns, qpair, zone_id, false,
8113 : bdev_nvme_zone_management_done, bio);
8114 0 : case SPDK_BDEV_ZONE_FINISH:
8115 0 : return spdk_nvme_zns_finish_zone(ns, qpair, zone_id, false,
8116 : bdev_nvme_zone_management_done, bio);
8117 0 : case SPDK_BDEV_ZONE_OPEN:
8118 0 : return spdk_nvme_zns_open_zone(ns, qpair, zone_id, false,
8119 : bdev_nvme_zone_management_done, bio);
8120 0 : case SPDK_BDEV_ZONE_RESET:
8121 0 : return spdk_nvme_zns_reset_zone(ns, qpair, zone_id, false,
8122 : bdev_nvme_zone_management_done, bio);
8123 0 : case SPDK_BDEV_ZONE_OFFLINE:
8124 0 : return spdk_nvme_zns_offline_zone(ns, qpair, zone_id, false,
8125 : bdev_nvme_zone_management_done, bio);
8126 0 : default:
8127 0 : return -EINVAL;
8128 : }
8129 : }
8130 :
8131 : static void
8132 5 : bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
8133 : struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
8134 : {
8135 : struct nvme_io_path *io_path;
8136 : struct nvme_ctrlr *nvme_ctrlr;
8137 : uint32_t max_xfer_size;
8138 5 : int rc = -ENXIO;
8139 :
8140 : /* Choose the first ctrlr which is not failed. */
8141 8 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
8142 7 : nvme_ctrlr = io_path->qpair->ctrlr;
8143 :
8144 : /* We should skip any unavailable nvme_ctrlr rather than checking
8145 : * if the return value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
8146 : */
8147 7 : if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
8148 3 : continue;
8149 : }
8150 :
8151 4 : max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ctrlr->ctrlr);
8152 :
8153 4 : if (nbytes > max_xfer_size) {
8154 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8155 0 : rc = -EINVAL;
8156 0 : goto err;
8157 : }
8158 :
8159 4 : rc = spdk_nvme_ctrlr_cmd_admin_raw(nvme_ctrlr->ctrlr, cmd, buf, (uint32_t)nbytes,
8160 : bdev_nvme_admin_passthru_done, bio);
8161 4 : if (rc == 0) {
8162 4 : return;
8163 : }
8164 : }
8165 :
8166 1 : err:
8167 1 : bdev_nvme_admin_complete(bio, rc);
8168 : }
8169 :
8170 : static int
8171 0 : bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
8172 : void *buf, size_t nbytes)
8173 : {
8174 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8175 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8176 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8177 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8178 :
8179 0 : if (nbytes > max_xfer_size) {
8180 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8181 0 : return -EINVAL;
8182 : }
8183 :
8184 : /*
8185 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
8186 : * so fill it out automatically.
8187 : */
8188 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8189 :
8190 0 : return spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, cmd, buf,
8191 : (uint32_t)nbytes, bdev_nvme_queued_done, bio);
8192 : }
8193 :
8194 : static int
8195 0 : bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
8196 : void *buf, size_t nbytes, void *md_buf, size_t md_len)
8197 : {
8198 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8199 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8200 0 : size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
8201 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8202 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8203 :
8204 0 : if (nbytes > max_xfer_size) {
8205 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8206 0 : return -EINVAL;
8207 : }
8208 :
8209 0 : if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
8210 0 : SPDK_ERRLOG("invalid meta data buffer size\n");
8211 0 : return -EINVAL;
8212 : }
8213 :
8214 : /*
8215 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
8216 : * so fill it out automatically.
8217 : */
8218 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8219 :
8220 0 : return spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, qpair, cmd, buf,
8221 : (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
8222 : }
8223 :
8224 : static int
8225 0 : bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio,
8226 : struct spdk_nvme_cmd *cmd, struct iovec *iov, int iovcnt,
8227 : size_t nbytes, void *md_buf, size_t md_len)
8228 : {
8229 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8230 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8231 0 : size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
8232 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8233 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8234 :
8235 0 : bio->iovs = iov;
8236 0 : bio->iovcnt = iovcnt;
8237 0 : bio->iovpos = 0;
8238 0 : bio->iov_offset = 0;
8239 :
8240 0 : if (nbytes > max_xfer_size) {
8241 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8242 0 : return -EINVAL;
8243 : }
8244 :
8245 0 : if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
8246 0 : SPDK_ERRLOG("invalid meta data buffer size\n");
8247 0 : return -EINVAL;
8248 : }
8249 :
8250 : /*
8251 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands
8252 : * require a nsid, so fill it out automatically.
8253 : */
8254 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8255 :
8256 0 : return spdk_nvme_ctrlr_cmd_iov_raw_with_md(
8257 : ctrlr, qpair, cmd, (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio,
8258 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
8259 : }
8260 :
8261 : static void
8262 6 : bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
8263 : struct nvme_bdev_io *bio_to_abort)
8264 : {
8265 : struct nvme_io_path *io_path;
8266 6 : int rc = 0;
8267 :
8268 6 : rc = bdev_nvme_abort_retry_io(nbdev_ch, bio_to_abort);
8269 6 : if (rc == 0) {
8270 1 : bdev_nvme_admin_complete(bio, 0);
8271 1 : return;
8272 : }
8273 :
8274 5 : io_path = bio_to_abort->io_path;
8275 5 : if (io_path != NULL) {
8276 3 : rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
8277 3 : io_path->qpair->qpair,
8278 : bio_to_abort,
8279 : bdev_nvme_abort_done, bio);
8280 : } else {
8281 3 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
8282 2 : rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
8283 : NULL,
8284 : bio_to_abort,
8285 : bdev_nvme_abort_done, bio);
8286 :
8287 2 : if (rc != -ENOENT) {
8288 1 : break;
8289 : }
8290 : }
8291 : }
8292 :
8293 5 : if (rc != 0) {
8294 : /* If no command was found or there was any error, complete the abort
8295 : * request with failure.
8296 : */
8297 2 : bdev_nvme_admin_complete(bio, rc);
8298 : }
8299 : }
8300 :
8301 : static int
8302 0 : bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
8303 : uint64_t num_blocks)
8304 : {
8305 0 : struct spdk_nvme_scc_source_range range = {
8306 : .slba = src_offset_blocks,
8307 0 : .nlb = num_blocks - 1
8308 : };
8309 :
8310 0 : return spdk_nvme_ns_cmd_copy(bio->io_path->nvme_ns->ns,
8311 0 : bio->io_path->qpair->qpair,
8312 : &range, 1, dst_offset_blocks,
8313 : bdev_nvme_queued_done, bio);
8314 : }
8315 :
8316 : static void
8317 0 : bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
8318 : {
8319 : const char *action;
8320 : uint32_t i;
8321 :
8322 0 : if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
8323 0 : action = "reset";
8324 0 : } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
8325 0 : action = "abort";
8326 : } else {
8327 0 : action = "none";
8328 : }
8329 :
8330 0 : spdk_json_write_object_begin(w);
8331 :
8332 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_set_options");
8333 :
8334 0 : spdk_json_write_named_object_begin(w, "params");
8335 0 : spdk_json_write_named_string(w, "action_on_timeout", action);
8336 0 : spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
8337 0 : spdk_json_write_named_uint64(w, "timeout_admin_us", g_opts.timeout_admin_us);
8338 0 : spdk_json_write_named_uint32(w, "keep_alive_timeout_ms", g_opts.keep_alive_timeout_ms);
8339 0 : spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst);
8340 0 : spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight);
8341 0 : spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight);
8342 0 : spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight);
8343 0 : spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
8344 0 : spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
8345 0 : spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
8346 0 : spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
8347 0 : spdk_json_write_named_uint32(w, "transport_retry_count", g_opts.transport_retry_count);
8348 0 : spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
8349 0 : spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
8350 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
8351 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
8352 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
8353 0 : spdk_json_write_named_bool(w, "disable_auto_failback", g_opts.disable_auto_failback);
8354 0 : spdk_json_write_named_bool(w, "generate_uuids", g_opts.generate_uuids);
8355 0 : spdk_json_write_named_uint8(w, "transport_tos", g_opts.transport_tos);
8356 0 : spdk_json_write_named_bool(w, "nvme_error_stat", g_opts.nvme_error_stat);
8357 0 : spdk_json_write_named_uint32(w, "rdma_srq_size", g_opts.rdma_srq_size);
8358 0 : spdk_json_write_named_bool(w, "io_path_stat", g_opts.io_path_stat);
8359 0 : spdk_json_write_named_bool(w, "allow_accel_sequence", g_opts.allow_accel_sequence);
8360 0 : spdk_json_write_named_uint32(w, "rdma_max_cq_size", g_opts.rdma_max_cq_size);
8361 0 : spdk_json_write_named_uint16(w, "rdma_cm_event_timeout_ms", g_opts.rdma_cm_event_timeout_ms);
8362 0 : spdk_json_write_named_array_begin(w, "dhchap_digests");
8363 0 : for (i = 0; i < 32; ++i) {
8364 0 : if (g_opts.dhchap_digests & SPDK_BIT(i)) {
8365 0 : spdk_json_write_string(w, spdk_nvme_dhchap_get_digest_name(i));
8366 : }
8367 : }
8368 0 : spdk_json_write_array_end(w);
8369 0 : spdk_json_write_named_array_begin(w, "dhchap_dhgroups");
8370 0 : for (i = 0; i < 32; ++i) {
8371 0 : if (g_opts.dhchap_dhgroups & SPDK_BIT(i)) {
8372 0 : spdk_json_write_string(w, spdk_nvme_dhchap_get_dhgroup_name(i));
8373 : }
8374 : }
8375 :
8376 0 : spdk_json_write_array_end(w);
8377 0 : spdk_json_write_object_end(w);
8378 :
8379 0 : spdk_json_write_object_end(w);
8380 0 : }
8381 :
8382 : static void
8383 0 : bdev_nvme_discovery_config_json(struct spdk_json_write_ctx *w, struct discovery_ctx *ctx)
8384 : {
8385 0 : struct spdk_nvme_transport_id trid;
8386 :
8387 0 : spdk_json_write_object_begin(w);
8388 :
8389 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_start_discovery");
8390 :
8391 0 : spdk_json_write_named_object_begin(w, "params");
8392 0 : spdk_json_write_named_string(w, "name", ctx->name);
8393 0 : spdk_json_write_named_string(w, "hostnqn", ctx->hostnqn);
8394 :
8395 0 : trid = ctx->trid;
8396 0 : memset(trid.subnqn, 0, sizeof(trid.subnqn));
8397 0 : nvme_bdev_dump_trid_json(&trid, w);
8398 :
8399 0 : spdk_json_write_named_bool(w, "wait_for_attach", ctx->wait_for_attach);
8400 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", ctx->bdev_opts.ctrlr_loss_timeout_sec);
8401 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", ctx->bdev_opts.reconnect_delay_sec);
8402 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
8403 : ctx->bdev_opts.fast_io_fail_timeout_sec);
8404 0 : spdk_json_write_object_end(w);
8405 :
8406 0 : spdk_json_write_object_end(w);
8407 0 : }
8408 :
8409 : #ifdef SPDK_CONFIG_NVME_CUSE
8410 : static void
8411 0 : nvme_ctrlr_cuse_config_json(struct spdk_json_write_ctx *w,
8412 : struct nvme_ctrlr *nvme_ctrlr)
8413 0 : {
8414 0 : size_t cuse_name_size = 128;
8415 0 : char cuse_name[cuse_name_size];
8416 :
8417 0 : if (spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr,
8418 : cuse_name, &cuse_name_size) != 0) {
8419 0 : return;
8420 : }
8421 :
8422 0 : spdk_json_write_object_begin(w);
8423 :
8424 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_cuse_register");
8425 :
8426 0 : spdk_json_write_named_object_begin(w, "params");
8427 0 : spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
8428 0 : spdk_json_write_object_end(w);
8429 :
8430 0 : spdk_json_write_object_end(w);
8431 : }
8432 : #endif
8433 :
8434 : static void
8435 0 : nvme_ctrlr_config_json(struct spdk_json_write_ctx *w,
8436 : struct nvme_ctrlr *nvme_ctrlr)
8437 : {
8438 : struct spdk_nvme_transport_id *trid;
8439 : const struct spdk_nvme_ctrlr_opts *opts;
8440 :
8441 0 : if (nvme_ctrlr->opts.from_discovery_service) {
8442 : /* Do not emit an RPC for this - it will be implicitly
8443 : * covered by a separate bdev_nvme_start_discovery or
8444 : * bdev_nvme_start_mdns_discovery RPC.
8445 : */
8446 0 : return;
8447 : }
8448 :
8449 0 : trid = &nvme_ctrlr->active_path_id->trid;
8450 :
8451 0 : spdk_json_write_object_begin(w);
8452 :
8453 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller");
8454 :
8455 0 : spdk_json_write_named_object_begin(w, "params");
8456 0 : spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
8457 0 : nvme_bdev_dump_trid_json(trid, w);
8458 0 : spdk_json_write_named_bool(w, "prchk_reftag",
8459 0 : (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
8460 0 : spdk_json_write_named_bool(w, "prchk_guard",
8461 0 : (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
8462 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->opts.ctrlr_loss_timeout_sec);
8463 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->opts.reconnect_delay_sec);
8464 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
8465 : nvme_ctrlr->opts.fast_io_fail_timeout_sec);
8466 0 : if (nvme_ctrlr->psk != NULL) {
8467 0 : spdk_json_write_named_string(w, "psk", spdk_key_get_name(nvme_ctrlr->psk));
8468 0 : } else if (nvme_ctrlr->opts.psk[0] != '\0') {
8469 0 : spdk_json_write_named_string(w, "psk", nvme_ctrlr->opts.psk);
8470 : }
8471 :
8472 0 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
8473 0 : spdk_json_write_named_string(w, "hostnqn", opts->hostnqn);
8474 0 : spdk_json_write_named_bool(w, "hdgst", opts->header_digest);
8475 0 : spdk_json_write_named_bool(w, "ddgst", opts->data_digest);
8476 0 : if (opts->src_addr[0] != '\0') {
8477 0 : spdk_json_write_named_string(w, "hostaddr", opts->src_addr);
8478 : }
8479 0 : if (opts->src_svcid[0] != '\0') {
8480 0 : spdk_json_write_named_string(w, "hostsvcid", opts->src_svcid);
8481 : }
8482 :
8483 0 : spdk_json_write_object_end(w);
8484 :
8485 0 : spdk_json_write_object_end(w);
8486 : }
8487 :
8488 : static void
8489 0 : bdev_nvme_hotplug_config_json(struct spdk_json_write_ctx *w)
8490 : {
8491 0 : spdk_json_write_object_begin(w);
8492 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug");
8493 :
8494 0 : spdk_json_write_named_object_begin(w, "params");
8495 0 : spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
8496 0 : spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
8497 0 : spdk_json_write_object_end(w);
8498 :
8499 0 : spdk_json_write_object_end(w);
8500 0 : }
8501 :
8502 : static int
8503 0 : bdev_nvme_config_json(struct spdk_json_write_ctx *w)
8504 : {
8505 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
8506 : struct nvme_ctrlr *nvme_ctrlr;
8507 : struct discovery_ctx *ctx;
8508 :
8509 0 : bdev_nvme_opts_config_json(w);
8510 :
8511 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
8512 :
8513 0 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
8514 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
8515 0 : nvme_ctrlr_config_json(w, nvme_ctrlr);
8516 :
8517 : #ifdef SPDK_CONFIG_NVME_CUSE
8518 0 : nvme_ctrlr_cuse_config_json(w, nvme_ctrlr);
8519 : #endif
8520 : }
8521 : }
8522 :
8523 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
8524 0 : if (!ctx->from_mdns_discovery_service) {
8525 0 : bdev_nvme_discovery_config_json(w, ctx);
8526 : }
8527 : }
8528 :
8529 0 : bdev_nvme_mdns_discovery_config_json(w);
8530 :
8531 : /* Dump as last parameter to give all NVMe bdevs chance to be constructed
8532 : * before enabling hotplug poller.
8533 : */
8534 0 : bdev_nvme_hotplug_config_json(w);
8535 :
8536 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
8537 0 : return 0;
8538 : }
8539 :
8540 : struct spdk_nvme_ctrlr *
8541 1 : bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
8542 : {
8543 : struct nvme_bdev *nbdev;
8544 : struct nvme_ns *nvme_ns;
8545 :
8546 1 : if (!bdev || bdev->module != &nvme_if) {
8547 0 : return NULL;
8548 : }
8549 :
8550 1 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
8551 1 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
8552 1 : assert(nvme_ns != NULL);
8553 :
8554 1 : return nvme_ns->ctrlr->ctrlr;
8555 : }
8556 :
8557 : static bool
8558 12 : nvme_io_path_is_current(struct nvme_io_path *io_path)
8559 : {
8560 : const struct nvme_bdev_channel *nbdev_ch;
8561 : bool current;
8562 :
8563 12 : if (!nvme_io_path_is_available(io_path)) {
8564 4 : return false;
8565 : }
8566 :
8567 8 : nbdev_ch = io_path->nbdev_ch;
8568 8 : if (nbdev_ch == NULL) {
8569 1 : current = false;
8570 7 : } else if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
8571 3 : struct nvme_io_path *optimized_io_path = NULL;
8572 :
8573 6 : STAILQ_FOREACH(optimized_io_path, &nbdev_ch->io_path_list, stailq) {
8574 5 : if (optimized_io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) {
8575 2 : break;
8576 : }
8577 : }
8578 :
8579 : /* A non-optimized path is only current if there are no optimized paths. */
8580 3 : current = (io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) ||
8581 : (optimized_io_path == NULL);
8582 : } else {
8583 4 : if (nbdev_ch->current_io_path) {
8584 1 : current = (io_path == nbdev_ch->current_io_path);
8585 : } else {
8586 : struct nvme_io_path *first_path;
8587 :
8588 : /* We arrived here as there are no optimized paths for active-passive
8589 : * mode. Check if this io_path is the first one available on the list.
8590 : */
8591 3 : current = false;
8592 3 : STAILQ_FOREACH(first_path, &nbdev_ch->io_path_list, stailq) {
8593 3 : if (nvme_io_path_is_available(first_path)) {
8594 3 : current = (io_path == first_path);
8595 3 : break;
8596 : }
8597 : }
8598 : }
8599 : }
8600 :
8601 8 : return current;
8602 : }
8603 :
8604 : void
8605 0 : nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path)
8606 : {
8607 0 : struct nvme_ns *nvme_ns = io_path->nvme_ns;
8608 0 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
8609 : const struct spdk_nvme_ctrlr_data *cdata;
8610 : const struct spdk_nvme_transport_id *trid;
8611 : const char *adrfam_str;
8612 :
8613 0 : spdk_json_write_object_begin(w);
8614 :
8615 0 : spdk_json_write_named_string(w, "bdev_name", nvme_ns->bdev->disk.name);
8616 :
8617 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
8618 0 : trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);
8619 :
8620 0 : spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
8621 0 : spdk_json_write_named_bool(w, "current", nvme_io_path_is_current(io_path));
8622 0 : spdk_json_write_named_bool(w, "connected", nvme_qpair_is_connected(io_path->qpair));
8623 0 : spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));
8624 :
8625 0 : spdk_json_write_named_object_begin(w, "transport");
8626 0 : spdk_json_write_named_string(w, "trtype", trid->trstring);
8627 0 : spdk_json_write_named_string(w, "traddr", trid->traddr);
8628 0 : if (trid->trsvcid[0] != '\0') {
8629 0 : spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
8630 : }
8631 0 : adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
8632 0 : if (adrfam_str) {
8633 0 : spdk_json_write_named_string(w, "adrfam", adrfam_str);
8634 : }
8635 0 : spdk_json_write_object_end(w);
8636 :
8637 0 : spdk_json_write_object_end(w);
8638 0 : }
8639 :
8640 : void
8641 0 : bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w)
8642 : {
8643 : struct discovery_ctx *ctx;
8644 : struct discovery_entry_ctx *entry_ctx;
8645 :
8646 0 : spdk_json_write_array_begin(w);
8647 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
8648 0 : spdk_json_write_object_begin(w);
8649 0 : spdk_json_write_named_string(w, "name", ctx->name);
8650 :
8651 0 : spdk_json_write_named_object_begin(w, "trid");
8652 0 : nvme_bdev_dump_trid_json(&ctx->trid, w);
8653 0 : spdk_json_write_object_end(w);
8654 :
8655 0 : spdk_json_write_named_array_begin(w, "referrals");
8656 0 : TAILQ_FOREACH(entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
8657 0 : spdk_json_write_object_begin(w);
8658 0 : spdk_json_write_named_object_begin(w, "trid");
8659 0 : nvme_bdev_dump_trid_json(&entry_ctx->trid, w);
8660 0 : spdk_json_write_object_end(w);
8661 0 : spdk_json_write_object_end(w);
8662 : }
8663 0 : spdk_json_write_array_end(w);
8664 :
8665 0 : spdk_json_write_object_end(w);
8666 : }
8667 0 : spdk_json_write_array_end(w);
8668 0 : }
8669 :
8670 1 : SPDK_LOG_REGISTER_COMPONENT(bdev_nvme)
8671 :
8672 1 : SPDK_TRACE_REGISTER_FN(bdev_nvme_trace, "bdev_nvme", TRACE_GROUP_BDEV_NVME)
8673 : {
8674 0 : struct spdk_trace_tpoint_opts opts[] = {
8675 : {
8676 : "BDEV_NVME_IO_START", TRACE_BDEV_NVME_IO_START,
8677 : OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 1,
8678 : {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
8679 : },
8680 : {
8681 : "BDEV_NVME_IO_DONE", TRACE_BDEV_NVME_IO_DONE,
8682 : OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 0,
8683 : {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
8684 : }
8685 : };
8686 :
8687 :
8688 0 : spdk_trace_register_object(OBJECT_BDEV_NVME_IO, 'N');
8689 0 : spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
8690 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
8691 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
8692 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
8693 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
8694 0 : }
|