Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2016 Intel Corporation. All rights reserved.
3 : * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4 : * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : * Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved.
6 : */
7 :
8 : #include "spdk/stdinc.h"
9 :
10 : #include "bdev_nvme.h"
11 :
12 : #include "spdk/accel.h"
13 : #include "spdk/config.h"
14 : #include "spdk/endian.h"
15 : #include "spdk/bdev.h"
16 : #include "spdk/json.h"
17 : #include "spdk/likely.h"
18 : #include "spdk/nvme.h"
19 : #include "spdk/nvme_ocssd.h"
20 : #include "spdk/nvme_zns.h"
21 : #include "spdk/opal.h"
22 : #include "spdk/thread.h"
23 : #include "spdk/trace.h"
24 : #include "spdk/string.h"
25 : #include "spdk/util.h"
26 : #include "spdk/uuid.h"
27 :
28 : #include "spdk/bdev_module.h"
29 : #include "spdk/log.h"
30 :
31 : #include "spdk_internal/usdt.h"
32 : #include "spdk_internal/trace_defs.h"
33 :
34 : #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true
35 : #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
36 :
37 : #define NSID_STR_LEN 10
38 :
39 : static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
40 :
41 : struct nvme_bdev_io {
42 : /** array of iovecs to transfer. */
43 : struct iovec *iovs;
44 :
45 : /** Number of iovecs in iovs array. */
46 : int iovcnt;
47 :
48 : /** Current iovec position. */
49 : int iovpos;
50 :
51 : /** Offset in current iovec. */
52 : uint32_t iov_offset;
53 :
54 : /** I/O path the current I/O or admin passthrough is submitted on, or the I/O path
55 : * being reset in a reset I/O.
56 : */
57 : struct nvme_io_path *io_path;
58 :
59 : /** array of iovecs to transfer. */
60 : struct iovec *fused_iovs;
61 :
62 : /** Number of iovecs in iovs array. */
63 : int fused_iovcnt;
64 :
65 : /** Current iovec position. */
66 : int fused_iovpos;
67 :
68 : /** Offset in current iovec. */
69 : uint32_t fused_iov_offset;
70 :
71 : /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */
72 : struct spdk_nvme_cpl cpl;
73 :
74 : /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */
75 : struct spdk_nvme_ns_cmd_ext_io_opts ext_opts;
76 :
77 : /** Keeps track if first of fused commands was submitted */
78 : bool first_fused_submitted;
79 :
80 : /** Keeps track if first of fused commands was completed */
81 : bool first_fused_completed;
82 :
83 : /** Temporary pointer to zone report buffer */
84 : struct spdk_nvme_zns_zone_report *zone_report_buf;
85 :
86 : /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */
87 : uint64_t handled_zones;
88 :
89 : /** Expiration value in ticks to retry the current I/O. */
90 : uint64_t retry_ticks;
91 :
92 : /* How many times the current I/O was retried. */
93 : int32_t retry_count;
94 :
95 : /* Current tsc at submit time. */
96 : uint64_t submit_tsc;
97 : };
98 :
99 : struct nvme_probe_skip_entry {
100 : struct spdk_nvme_transport_id trid;
101 : TAILQ_ENTRY(nvme_probe_skip_entry) tailq;
102 : };
103 : /* All the controllers deleted by users via RPC are skipped by hotplug monitor */
104 : static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
105 : g_skipped_nvme_ctrlrs);
106 :
107 : static struct spdk_bdev_nvme_opts g_opts = {
108 : .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
109 : .timeout_us = 0,
110 : .timeout_admin_us = 0,
111 : .keep_alive_timeout_ms = SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS,
112 : .transport_retry_count = 4,
113 : .arbitration_burst = 0,
114 : .low_priority_weight = 0,
115 : .medium_priority_weight = 0,
116 : .high_priority_weight = 0,
117 : .nvme_adminq_poll_period_us = 10000ULL,
118 : .nvme_ioq_poll_period_us = 0,
119 : .io_queue_requests = 0,
120 : .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
121 : .bdev_retry_count = 3,
122 : .transport_ack_timeout = 0,
123 : .ctrlr_loss_timeout_sec = 0,
124 : .reconnect_delay_sec = 0,
125 : .fast_io_fail_timeout_sec = 0,
126 : .disable_auto_failback = false,
127 : .generate_uuids = false,
128 : .transport_tos = 0,
129 : .nvme_error_stat = false,
130 : .io_path_stat = false,
131 : .allow_accel_sequence = false,
132 : };
133 :
134 : #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL
135 : #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL
136 :
137 : static int g_hot_insert_nvme_controller_index = 0;
138 : static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
139 : static bool g_nvme_hotplug_enabled = false;
140 : struct spdk_thread *g_bdev_nvme_init_thread;
141 : static struct spdk_poller *g_hotplug_poller;
142 : static struct spdk_poller *g_hotplug_probe_poller;
143 : static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
144 :
145 : static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
146 : struct nvme_async_probe_ctx *ctx);
147 : static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
148 : struct nvme_async_probe_ctx *ctx);
149 : static int bdev_nvme_library_init(void);
150 : static void bdev_nvme_library_fini(void);
151 : static void _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch,
152 : struct spdk_bdev_io *bdev_io);
153 : static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
154 : struct spdk_bdev_io *bdev_io);
155 : static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
156 : void *md, uint64_t lba_count, uint64_t lba,
157 : uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
158 : struct spdk_accel_sequence *seq);
159 : static int bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
160 : void *md, uint64_t lba_count, uint64_t lba);
161 : static int bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
162 : void *md, uint64_t lba_count, uint64_t lba,
163 : uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
164 : struct spdk_accel_sequence *seq);
165 : static int bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
166 : void *md, uint64_t lba_count,
167 : uint64_t zslba, uint32_t flags);
168 : static int bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
169 : void *md, uint64_t lba_count, uint64_t lba,
170 : uint32_t flags);
171 : static int bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio,
172 : struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
173 : int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba,
174 : uint32_t flags);
175 : static int bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id,
176 : uint32_t num_zones, struct spdk_bdev_zone_info *info);
177 : static int bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
178 : enum spdk_bdev_zone_action action);
179 : static void bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch,
180 : struct nvme_bdev_io *bio,
181 : struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
182 : static int bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
183 : void *buf, size_t nbytes);
184 : static int bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
185 : void *buf, size_t nbytes, void *md_buf, size_t md_len);
186 : static int bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
187 : struct iovec *iov, int iovcnt, size_t nbytes,
188 : void *md_buf, size_t md_len);
189 : static void bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch,
190 : struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
191 : static void bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio);
192 : static int bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
193 : static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
194 : static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
195 : static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr);
196 :
197 : static struct nvme_ns *nvme_ns_alloc(void);
198 : static void nvme_ns_free(struct nvme_ns *ns);
199 :
200 : static int
201 173 : nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2)
202 : {
203 173 : return ns1->id < ns2->id ? -1 : ns1->id > ns2->id;
204 : }
205 :
206 895 : RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp);
207 :
208 : struct spdk_nvme_qpair *
209 1 : bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
210 : {
211 : struct nvme_ctrlr_channel *ctrlr_ch;
212 :
213 1 : assert(ctrlr_io_ch != NULL);
214 :
215 1 : ctrlr_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
216 :
217 1 : return ctrlr_ch->qpair->qpair;
218 : }
219 :
220 : static int
221 0 : bdev_nvme_get_ctx_size(void)
222 : {
223 0 : return sizeof(struct nvme_bdev_io);
224 : }
225 :
226 : static struct spdk_bdev_module nvme_if = {
227 : .name = "nvme",
228 : .async_fini = true,
229 : .module_init = bdev_nvme_library_init,
230 : .module_fini = bdev_nvme_library_fini,
231 : .config_json = bdev_nvme_config_json,
232 : .get_ctx_size = bdev_nvme_get_ctx_size,
233 :
234 : };
235 1 : SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
236 :
237 : struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_bdev_ctrlrs);
238 : pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
239 : bool g_bdev_nvme_module_finish;
240 :
241 : struct nvme_bdev_ctrlr *
242 270 : nvme_bdev_ctrlr_get_by_name(const char *name)
243 : {
244 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
245 :
246 270 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
247 148 : if (strcmp(name, nbdev_ctrlr->name) == 0) {
248 148 : break;
249 : }
250 : }
251 :
252 270 : return nbdev_ctrlr;
253 : }
254 :
255 : static struct nvme_ctrlr *
256 58 : nvme_bdev_ctrlr_get_ctrlr(struct nvme_bdev_ctrlr *nbdev_ctrlr,
257 : const struct spdk_nvme_transport_id *trid)
258 : {
259 : struct nvme_ctrlr *nvme_ctrlr;
260 :
261 99 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
262 74 : if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->active_path_id->trid) == 0) {
263 33 : break;
264 : }
265 : }
266 :
267 58 : return nvme_ctrlr;
268 : }
269 :
270 : struct nvme_ctrlr *
271 0 : nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr,
272 : uint16_t cntlid)
273 : {
274 : struct nvme_ctrlr *nvme_ctrlr;
275 : const struct spdk_nvme_ctrlr_data *cdata;
276 :
277 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
278 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
279 0 : if (cdata->cntlid == cntlid) {
280 0 : break;
281 : }
282 : }
283 :
284 0 : return nvme_ctrlr;
285 : }
286 :
287 : static struct nvme_bdev *
288 72 : nvme_bdev_ctrlr_get_bdev(struct nvme_bdev_ctrlr *nbdev_ctrlr, uint32_t nsid)
289 : {
290 : struct nvme_bdev *bdev;
291 :
292 72 : pthread_mutex_lock(&g_bdev_nvme_mutex);
293 106 : TAILQ_FOREACH(bdev, &nbdev_ctrlr->bdevs, tailq) {
294 68 : if (bdev->nsid == nsid) {
295 34 : break;
296 : }
297 : }
298 72 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
299 :
300 72 : return bdev;
301 : }
302 :
303 : struct nvme_ns *
304 139 : nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
305 : {
306 139 : struct nvme_ns ns;
307 :
308 139 : assert(nsid > 0);
309 :
310 139 : ns.id = nsid;
311 139 : return RB_FIND(nvme_ns_tree, &nvme_ctrlr->namespaces, &ns);
312 : }
313 :
314 : struct nvme_ns *
315 152 : nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr)
316 : {
317 152 : return RB_MIN(nvme_ns_tree, &nvme_ctrlr->namespaces);
318 : }
319 :
320 : struct nvme_ns *
321 63 : nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns)
322 : {
323 63 : if (ns == NULL) {
324 0 : return NULL;
325 : }
326 :
327 63 : return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
328 : }
329 :
330 : static struct nvme_ctrlr *
331 51 : nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid)
332 : {
333 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
334 51 : struct nvme_ctrlr *nvme_ctrlr = NULL;
335 :
336 51 : pthread_mutex_lock(&g_bdev_nvme_mutex);
337 70 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
338 19 : nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, trid);
339 19 : if (nvme_ctrlr != NULL) {
340 0 : break;
341 : }
342 : }
343 51 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
344 :
345 51 : return nvme_ctrlr;
346 : }
347 :
348 : struct nvme_ctrlr *
349 71 : nvme_ctrlr_get_by_name(const char *name)
350 : {
351 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
352 71 : struct nvme_ctrlr *nvme_ctrlr = NULL;
353 :
354 71 : if (name == NULL) {
355 0 : return NULL;
356 : }
357 :
358 71 : pthread_mutex_lock(&g_bdev_nvme_mutex);
359 71 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
360 71 : if (nbdev_ctrlr != NULL) {
361 40 : nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
362 : }
363 71 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
364 :
365 71 : return nvme_ctrlr;
366 : }
367 :
368 : void
369 0 : nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx)
370 : {
371 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
372 :
373 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
374 0 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
375 0 : fn(nbdev_ctrlr, ctx);
376 : }
377 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
378 0 : }
379 :
380 : void
381 0 : nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
382 : {
383 : const char *trtype_str;
384 : const char *adrfam_str;
385 :
386 0 : trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
387 0 : if (trtype_str) {
388 0 : spdk_json_write_named_string(w, "trtype", trtype_str);
389 : }
390 :
391 0 : adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
392 0 : if (adrfam_str) {
393 0 : spdk_json_write_named_string(w, "adrfam", adrfam_str);
394 : }
395 :
396 0 : if (trid->traddr[0] != '\0') {
397 0 : spdk_json_write_named_string(w, "traddr", trid->traddr);
398 : }
399 :
400 0 : if (trid->trsvcid[0] != '\0') {
401 0 : spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
402 : }
403 :
404 0 : if (trid->subnqn[0] != '\0') {
405 0 : spdk_json_write_named_string(w, "subnqn", trid->subnqn);
406 : }
407 0 : }
408 :
409 : static void
410 59 : nvme_bdev_ctrlr_delete(struct nvme_bdev_ctrlr *nbdev_ctrlr,
411 : struct nvme_ctrlr *nvme_ctrlr)
412 : {
413 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_delete, nvme_ctrlr->nbdev_ctrlr->name);
414 59 : pthread_mutex_lock(&g_bdev_nvme_mutex);
415 :
416 59 : TAILQ_REMOVE(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
417 59 : if (!TAILQ_EMPTY(&nbdev_ctrlr->ctrlrs)) {
418 15 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
419 :
420 15 : return;
421 : }
422 44 : TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
423 :
424 44 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
425 :
426 44 : assert(TAILQ_EMPTY(&nbdev_ctrlr->bdevs));
427 :
428 44 : free(nbdev_ctrlr->name);
429 44 : free(nbdev_ctrlr);
430 : }
431 :
432 : static void
433 60 : _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
434 : {
435 : struct nvme_path_id *path_id, *tmp_path;
436 : struct nvme_ns *ns, *tmp_ns;
437 :
438 60 : free(nvme_ctrlr->copied_ana_desc);
439 60 : spdk_free(nvme_ctrlr->ana_log_page);
440 :
441 60 : if (nvme_ctrlr->opal_dev) {
442 0 : spdk_opal_dev_destruct(nvme_ctrlr->opal_dev);
443 0 : nvme_ctrlr->opal_dev = NULL;
444 : }
445 :
446 60 : if (nvme_ctrlr->nbdev_ctrlr) {
447 59 : nvme_bdev_ctrlr_delete(nvme_ctrlr->nbdev_ctrlr, nvme_ctrlr);
448 : }
449 :
450 60 : RB_FOREACH_SAFE(ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp_ns) {
451 0 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
452 0 : nvme_ns_free(ns);
453 : }
454 :
455 120 : TAILQ_FOREACH_SAFE(path_id, &nvme_ctrlr->trids, link, tmp_path) {
456 60 : TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
457 60 : free(path_id);
458 : }
459 :
460 60 : pthread_mutex_destroy(&nvme_ctrlr->mutex);
461 :
462 60 : free(nvme_ctrlr);
463 :
464 60 : pthread_mutex_lock(&g_bdev_nvme_mutex);
465 60 : if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
466 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
467 0 : spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
468 0 : spdk_bdev_module_fini_done();
469 0 : return;
470 : }
471 60 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
472 : }
473 :
474 : static int
475 60 : nvme_detach_poller(void *arg)
476 : {
477 60 : struct nvme_ctrlr *nvme_ctrlr = arg;
478 : int rc;
479 :
480 60 : rc = spdk_nvme_detach_poll_async(nvme_ctrlr->detach_ctx);
481 60 : if (rc != -EAGAIN) {
482 60 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
483 60 : _nvme_ctrlr_delete(nvme_ctrlr);
484 : }
485 :
486 60 : return SPDK_POLLER_BUSY;
487 : }
488 :
489 : static void
490 60 : nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
491 : {
492 : int rc;
493 :
494 60 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
495 :
496 : /* First, unregister the adminq poller, as the driver will poll adminq if necessary */
497 60 : spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
498 :
499 : /* If we got here, the reset/detach poller cannot be active */
500 60 : assert(nvme_ctrlr->reset_detach_poller == NULL);
501 60 : nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(nvme_detach_poller,
502 : nvme_ctrlr, 1000);
503 60 : if (nvme_ctrlr->reset_detach_poller == NULL) {
504 0 : SPDK_ERRLOG("Failed to register detach poller\n");
505 0 : goto error;
506 : }
507 :
508 60 : rc = spdk_nvme_detach_async(nvme_ctrlr->ctrlr, &nvme_ctrlr->detach_ctx);
509 60 : if (rc != 0) {
510 0 : SPDK_ERRLOG("Failed to detach the NVMe controller\n");
511 0 : goto error;
512 : }
513 :
514 60 : return;
515 0 : error:
516 : /* We don't have a good way to handle errors here, so just do what we can and delete the
517 : * controller without detaching the underlying NVMe device.
518 : */
519 0 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
520 0 : _nvme_ctrlr_delete(nvme_ctrlr);
521 : }
522 :
523 : static void
524 59 : nvme_ctrlr_unregister_cb(void *io_device)
525 : {
526 59 : struct nvme_ctrlr *nvme_ctrlr = io_device;
527 :
528 59 : nvme_ctrlr_delete(nvme_ctrlr);
529 59 : }
530 :
531 : static void
532 59 : nvme_ctrlr_unregister(void *ctx)
533 : {
534 59 : struct nvme_ctrlr *nvme_ctrlr = ctx;
535 :
536 59 : spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb);
537 59 : }
538 :
539 : static bool
540 220 : nvme_ctrlr_can_be_unregistered(struct nvme_ctrlr *nvme_ctrlr)
541 : {
542 220 : if (!nvme_ctrlr->destruct) {
543 105 : return false;
544 : }
545 :
546 115 : if (nvme_ctrlr->ref > 0) {
547 56 : return false;
548 : }
549 :
550 59 : if (nvme_ctrlr->resetting) {
551 0 : return false;
552 : }
553 :
554 59 : if (nvme_ctrlr->ana_log_page_updating) {
555 0 : return false;
556 : }
557 :
558 59 : if (nvme_ctrlr->io_path_cache_clearing) {
559 0 : return false;
560 : }
561 :
562 59 : return true;
563 : }
564 :
565 : static void
566 164 : nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr)
567 : {
568 164 : pthread_mutex_lock(&nvme_ctrlr->mutex);
569 : SPDK_DTRACE_PROBE2(bdev_nvme_ctrlr_release, nvme_ctrlr->nbdev_ctrlr->name, nvme_ctrlr->ref);
570 :
571 164 : assert(nvme_ctrlr->ref > 0);
572 164 : nvme_ctrlr->ref--;
573 :
574 164 : if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
575 105 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
576 105 : return;
577 : }
578 :
579 59 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
580 :
581 59 : spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr);
582 : }
583 :
584 : static void
585 161 : bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch)
586 : {
587 161 : nbdev_ch->current_io_path = NULL;
588 161 : nbdev_ch->rr_counter = 0;
589 161 : }
590 :
591 : static struct nvme_io_path *
592 8 : _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
593 : {
594 : struct nvme_io_path *io_path;
595 :
596 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
597 15 : if (io_path->nvme_ns == nvme_ns) {
598 7 : break;
599 : }
600 : }
601 :
602 8 : return io_path;
603 : }
604 :
605 : static struct nvme_io_path *
606 35 : nvme_io_path_alloc(void)
607 : {
608 : struct nvme_io_path *io_path;
609 :
610 35 : io_path = calloc(1, sizeof(*io_path));
611 35 : if (io_path == NULL) {
612 0 : SPDK_ERRLOG("Failed to alloc io_path.\n");
613 0 : return NULL;
614 : }
615 :
616 35 : if (g_opts.io_path_stat) {
617 0 : io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
618 0 : if (io_path->stat == NULL) {
619 0 : free(io_path);
620 0 : SPDK_ERRLOG("Failed to alloc io_path stat.\n");
621 0 : return NULL;
622 : }
623 0 : spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
624 : }
625 :
626 35 : return io_path;
627 : }
628 :
629 : static void
630 35 : nvme_io_path_free(struct nvme_io_path *io_path)
631 : {
632 35 : free(io_path->stat);
633 35 : free(io_path);
634 35 : }
635 :
636 : static int
637 35 : _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
638 : {
639 : struct nvme_io_path *io_path;
640 : struct spdk_io_channel *ch;
641 : struct nvme_ctrlr_channel *ctrlr_ch;
642 : struct nvme_qpair *nvme_qpair;
643 :
644 35 : io_path = nvme_io_path_alloc();
645 35 : if (io_path == NULL) {
646 0 : return -ENOMEM;
647 : }
648 :
649 35 : io_path->nvme_ns = nvme_ns;
650 :
651 35 : ch = spdk_get_io_channel(nvme_ns->ctrlr);
652 35 : if (ch == NULL) {
653 0 : nvme_io_path_free(io_path);
654 0 : SPDK_ERRLOG("Failed to alloc io_channel.\n");
655 0 : return -ENOMEM;
656 : }
657 :
658 35 : ctrlr_ch = spdk_io_channel_get_ctx(ch);
659 :
660 35 : nvme_qpair = ctrlr_ch->qpair;
661 35 : assert(nvme_qpair != NULL);
662 :
663 35 : io_path->qpair = nvme_qpair;
664 35 : TAILQ_INSERT_TAIL(&nvme_qpair->io_path_list, io_path, tailq);
665 :
666 35 : io_path->nbdev_ch = nbdev_ch;
667 35 : STAILQ_INSERT_TAIL(&nbdev_ch->io_path_list, io_path, stailq);
668 :
669 35 : bdev_nvme_clear_current_io_path(nbdev_ch);
670 :
671 35 : return 0;
672 : }
673 :
674 : static void
675 35 : bdev_nvme_clear_retry_io_path(struct nvme_bdev_channel *nbdev_ch,
676 : struct nvme_io_path *io_path)
677 : {
678 : struct spdk_bdev_io *bdev_io;
679 : struct nvme_bdev_io *bio;
680 :
681 36 : TAILQ_FOREACH(bdev_io, &nbdev_ch->retry_io_list, module_link) {
682 1 : bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
683 1 : if (bio->io_path == io_path) {
684 1 : bio->io_path = NULL;
685 : }
686 : }
687 35 : }
688 :
689 : static void
690 35 : _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *io_path)
691 : {
692 : struct spdk_io_channel *ch;
693 : struct nvme_qpair *nvme_qpair;
694 : struct nvme_ctrlr_channel *ctrlr_ch;
695 : struct nvme_bdev *nbdev;
696 :
697 35 : nbdev = spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(nbdev_ch));
698 :
699 : /* Add the statistics to nvme_ns before this path is destroyed. */
700 35 : pthread_mutex_lock(&nbdev->mutex);
701 35 : if (nbdev->ref != 0 && io_path->nvme_ns->stat != NULL && io_path->stat != NULL) {
702 0 : spdk_bdev_add_io_stat(io_path->nvme_ns->stat, io_path->stat);
703 : }
704 35 : pthread_mutex_unlock(&nbdev->mutex);
705 :
706 35 : bdev_nvme_clear_current_io_path(nbdev_ch);
707 35 : bdev_nvme_clear_retry_io_path(nbdev_ch, io_path);
708 :
709 35 : STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
710 35 : io_path->nbdev_ch = NULL;
711 :
712 35 : nvme_qpair = io_path->qpair;
713 35 : assert(nvme_qpair != NULL);
714 :
715 35 : ctrlr_ch = nvme_qpair->ctrlr_ch;
716 35 : assert(ctrlr_ch != NULL);
717 :
718 35 : ch = spdk_io_channel_from_ctx(ctrlr_ch);
719 35 : spdk_put_io_channel(ch);
720 :
721 : /* After an io_path is removed, I/Os submitted to it may complete and update statistics
722 : * of the io_path. To avoid heap-use-after-free error from this case, do not free the
723 : * io_path here but free the io_path when the associated qpair is freed. It is ensured
724 : * that all I/Os submitted to the io_path are completed when the associated qpair is freed.
725 : */
726 35 : }
727 :
728 : static void
729 22 : _bdev_nvme_delete_io_paths(struct nvme_bdev_channel *nbdev_ch)
730 : {
731 : struct nvme_io_path *io_path, *tmp_io_path;
732 :
733 55 : STAILQ_FOREACH_SAFE(io_path, &nbdev_ch->io_path_list, stailq, tmp_io_path) {
734 33 : _bdev_nvme_delete_io_path(nbdev_ch, io_path);
735 : }
736 22 : }
737 :
738 : static int
739 22 : bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
740 : {
741 22 : struct nvme_bdev_channel *nbdev_ch = ctx_buf;
742 22 : struct nvme_bdev *nbdev = io_device;
743 : struct nvme_ns *nvme_ns;
744 : int rc;
745 :
746 22 : STAILQ_INIT(&nbdev_ch->io_path_list);
747 22 : TAILQ_INIT(&nbdev_ch->retry_io_list);
748 :
749 22 : pthread_mutex_lock(&nbdev->mutex);
750 :
751 22 : nbdev_ch->mp_policy = nbdev->mp_policy;
752 22 : nbdev_ch->mp_selector = nbdev->mp_selector;
753 22 : nbdev_ch->rr_min_io = nbdev->rr_min_io;
754 :
755 55 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
756 33 : rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
757 33 : if (rc != 0) {
758 0 : pthread_mutex_unlock(&nbdev->mutex);
759 :
760 0 : _bdev_nvme_delete_io_paths(nbdev_ch);
761 0 : return rc;
762 : }
763 : }
764 22 : pthread_mutex_unlock(&nbdev->mutex);
765 :
766 22 : return 0;
767 : }
768 :
769 : /* If cpl != NULL, complete the bdev_io with nvme status based on 'cpl'.
770 : * If cpl == NULL, complete the bdev_io with bdev status based on 'status'.
771 : */
772 : static inline void
773 47 : __bdev_nvme_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status,
774 : const struct spdk_nvme_cpl *cpl)
775 : {
776 47 : spdk_trace_record(TRACE_BDEV_NVME_IO_DONE, 0, 0, (uintptr_t)bdev_io->driver_ctx,
777 : (uintptr_t)bdev_io);
778 47 : if (cpl) {
779 29 : spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
780 : } else {
781 18 : spdk_bdev_io_complete(bdev_io, status);
782 : }
783 47 : }
784 :
785 : static void bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch);
786 :
787 : static void
788 22 : bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf)
789 : {
790 22 : struct nvme_bdev_channel *nbdev_ch = ctx_buf;
791 :
792 22 : bdev_nvme_abort_retry_ios(nbdev_ch);
793 22 : _bdev_nvme_delete_io_paths(nbdev_ch);
794 22 : }
795 :
796 : static inline bool
797 58 : bdev_nvme_io_type_is_admin(enum spdk_bdev_io_type io_type)
798 : {
799 58 : switch (io_type) {
800 5 : case SPDK_BDEV_IO_TYPE_RESET:
801 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
802 : case SPDK_BDEV_IO_TYPE_ABORT:
803 5 : return true;
804 53 : default:
805 53 : break;
806 : }
807 :
808 53 : return false;
809 : }
810 :
811 : static inline bool
812 77 : nvme_ns_is_active(struct nvme_ns *nvme_ns)
813 : {
814 77 : if (spdk_unlikely(nvme_ns->ana_state_updating)) {
815 1 : return false;
816 : }
817 :
818 76 : if (spdk_unlikely(nvme_ns->ns == NULL)) {
819 0 : return false;
820 : }
821 :
822 76 : return true;
823 : }
824 :
825 : static inline bool
826 8 : nvme_ns_is_accessible(struct nvme_ns *nvme_ns)
827 : {
828 8 : if (spdk_unlikely(!nvme_ns_is_active(nvme_ns))) {
829 0 : return false;
830 : }
831 :
832 8 : switch (nvme_ns->ana_state) {
833 8 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
834 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
835 8 : return true;
836 0 : default:
837 0 : break;
838 : }
839 :
840 0 : return false;
841 : }
842 :
843 : static inline bool
844 102 : nvme_qpair_is_connected(struct nvme_qpair *nvme_qpair)
845 : {
846 102 : if (spdk_unlikely(nvme_qpair->qpair == NULL)) {
847 20 : return false;
848 : }
849 :
850 82 : if (spdk_unlikely(spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
851 : SPDK_NVME_QPAIR_FAILURE_NONE)) {
852 0 : return false;
853 : }
854 :
855 82 : if (spdk_unlikely(nvme_qpair->ctrlr_ch->reset_iter != NULL)) {
856 0 : return false;
857 : }
858 :
859 82 : return true;
860 : }
861 :
862 : static inline bool
863 8 : nvme_io_path_is_available(struct nvme_io_path *io_path)
864 : {
865 8 : if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
866 0 : return false;
867 : }
868 :
869 8 : if (spdk_unlikely(!nvme_ns_is_accessible(io_path->nvme_ns))) {
870 0 : return false;
871 : }
872 :
873 8 : return true;
874 : }
875 :
876 : static inline bool
877 8 : nvme_ctrlr_is_failed(struct nvme_ctrlr *nvme_ctrlr)
878 : {
879 8 : if (nvme_ctrlr->destruct) {
880 0 : return true;
881 : }
882 :
883 8 : if (nvme_ctrlr->fast_io_fail_timedout) {
884 2 : return true;
885 : }
886 :
887 6 : if (nvme_ctrlr->resetting) {
888 4 : if (nvme_ctrlr->opts.reconnect_delay_sec != 0) {
889 4 : return false;
890 : } else {
891 0 : return true;
892 : }
893 : }
894 :
895 2 : if (nvme_ctrlr->reconnect_is_delayed) {
896 2 : return false;
897 : }
898 :
899 0 : if (nvme_ctrlr->disabled) {
900 0 : return true;
901 : }
902 :
903 0 : if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
904 0 : return true;
905 : } else {
906 0 : return false;
907 : }
908 : }
909 :
910 : static bool
911 20 : nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr)
912 : {
913 20 : if (nvme_ctrlr->destruct) {
914 0 : return false;
915 : }
916 :
917 20 : if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
918 3 : return false;
919 : }
920 :
921 17 : if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) {
922 1 : return false;
923 : }
924 :
925 16 : if (nvme_ctrlr->disabled) {
926 0 : return false;
927 : }
928 :
929 16 : return true;
930 : }
931 :
932 : /* Simulate circular linked list. */
933 : static inline struct nvme_io_path *
934 87 : nvme_io_path_get_next(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *prev_path)
935 : {
936 : struct nvme_io_path *next_path;
937 :
938 87 : if (prev_path != NULL) {
939 37 : next_path = STAILQ_NEXT(prev_path, stailq);
940 37 : if (next_path != NULL) {
941 14 : return next_path;
942 : }
943 : }
944 :
945 73 : return STAILQ_FIRST(&nbdev_ch->io_path_list);
946 : }
947 :
948 : static struct nvme_io_path *
949 57 : _bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
950 : {
951 57 : struct nvme_io_path *io_path, *start, *non_optimized = NULL;
952 :
953 57 : start = nvme_io_path_get_next(nbdev_ch, nbdev_ch->current_io_path);
954 :
955 57 : io_path = start;
956 : do {
957 69 : if (spdk_likely(nvme_qpair_is_connected(io_path->qpair) &&
958 : nvme_ns_is_active(io_path->nvme_ns))) {
959 56 : switch (io_path->nvme_ns->ana_state) {
960 39 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
961 39 : nbdev_ch->current_io_path = io_path;
962 39 : return io_path;
963 10 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
964 10 : if (non_optimized == NULL) {
965 7 : non_optimized = io_path;
966 : }
967 10 : break;
968 7 : default:
969 7 : break;
970 : }
971 : }
972 30 : io_path = nvme_io_path_get_next(nbdev_ch, io_path);
973 30 : } while (io_path != start);
974 :
975 18 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
976 : /* We come here only if there is no optimized path. Cache even non_optimized
977 : * path for load balance across multiple non_optimized paths.
978 : */
979 1 : nbdev_ch->current_io_path = non_optimized;
980 : }
981 :
982 18 : return non_optimized;
983 : }
984 :
985 : static struct nvme_io_path *
986 4 : _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch)
987 : {
988 : struct nvme_io_path *io_path;
989 4 : struct nvme_io_path *optimized = NULL, *non_optimized = NULL;
990 4 : uint32_t opt_min_qd = UINT32_MAX, non_opt_min_qd = UINT32_MAX;
991 : uint32_t num_outstanding_reqs;
992 :
993 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
994 12 : if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
995 : /* The device is currently resetting. */
996 0 : continue;
997 : }
998 :
999 12 : if (spdk_unlikely(!nvme_ns_is_active(io_path->nvme_ns))) {
1000 0 : continue;
1001 : }
1002 :
1003 12 : num_outstanding_reqs = spdk_nvme_qpair_get_num_outstanding_reqs(io_path->qpair->qpair);
1004 12 : switch (io_path->nvme_ns->ana_state) {
1005 6 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
1006 6 : if (num_outstanding_reqs < opt_min_qd) {
1007 5 : opt_min_qd = num_outstanding_reqs;
1008 5 : optimized = io_path;
1009 : }
1010 6 : break;
1011 3 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
1012 3 : if (num_outstanding_reqs < non_opt_min_qd) {
1013 3 : non_opt_min_qd = num_outstanding_reqs;
1014 3 : non_optimized = io_path;
1015 : }
1016 3 : break;
1017 3 : default:
1018 3 : break;
1019 : }
1020 : }
1021 :
1022 : /* don't cache io path for BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH selector */
1023 4 : if (optimized != NULL) {
1024 3 : return optimized;
1025 : }
1026 :
1027 1 : return non_optimized;
1028 : }
1029 :
1030 : static inline struct nvme_io_path *
1031 95 : bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
1032 : {
1033 95 : if (spdk_likely(nbdev_ch->current_io_path != NULL)) {
1034 41 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) {
1035 31 : return nbdev_ch->current_io_path;
1036 10 : } else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
1037 10 : if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) {
1038 3 : return nbdev_ch->current_io_path;
1039 : }
1040 7 : nbdev_ch->rr_counter = 0;
1041 : }
1042 : }
1043 :
1044 61 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE ||
1045 14 : nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
1046 57 : return _bdev_nvme_find_io_path(nbdev_ch);
1047 : } else {
1048 4 : return _bdev_nvme_find_io_path_min_qd(nbdev_ch);
1049 : }
1050 : }
1051 :
1052 : /* Return true if there is any io_path whose qpair is active or ctrlr is not failed,
1053 : * or false otherwise.
1054 : *
1055 : * If any io_path has an active qpair but find_io_path() returned NULL, its namespace
1056 : * is likely to be non-accessible now but may become accessible.
1057 : *
1058 : * If any io_path has an unfailed ctrlr but find_io_path() returned NULL, the ctrlr
1059 : * is likely to be resetting now but the reset may succeed. A ctrlr is set to unfailed
1060 : * when starting to reset it but it is set to failed when the reset failed. Hence, if
1061 : * a ctrlr is unfailed, it is likely that it works fine or is resetting.
1062 : */
1063 : static bool
1064 13 : any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
1065 : {
1066 : struct nvme_io_path *io_path;
1067 :
1068 15 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
1069 13 : if (io_path->nvme_ns->ana_transition_timedout) {
1070 0 : continue;
1071 : }
1072 :
1073 13 : if (nvme_qpair_is_connected(io_path->qpair) ||
1074 8 : !nvme_ctrlr_is_failed(io_path->qpair->ctrlr)) {
1075 11 : return true;
1076 : }
1077 : }
1078 :
1079 2 : return false;
1080 : }
1081 :
1082 : static void
1083 14 : bdev_nvme_retry_io(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
1084 : {
1085 14 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1086 : struct spdk_io_channel *ch;
1087 :
1088 14 : if (nbdev_io->io_path != NULL && nvme_io_path_is_available(nbdev_io->io_path)) {
1089 3 : _bdev_nvme_submit_request(nbdev_ch, bdev_io);
1090 : } else {
1091 11 : ch = spdk_io_channel_from_ctx(nbdev_ch);
1092 11 : bdev_nvme_submit_request(ch, bdev_io);
1093 : }
1094 14 : }
1095 :
1096 : static int
1097 14 : bdev_nvme_retry_ios(void *arg)
1098 : {
1099 14 : struct nvme_bdev_channel *nbdev_ch = arg;
1100 : struct spdk_bdev_io *bdev_io, *tmp_bdev_io;
1101 : struct nvme_bdev_io *bio;
1102 : uint64_t now, delay_us;
1103 :
1104 14 : now = spdk_get_ticks();
1105 :
1106 28 : TAILQ_FOREACH_SAFE(bdev_io, &nbdev_ch->retry_io_list, module_link, tmp_bdev_io) {
1107 15 : bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1108 15 : if (bio->retry_ticks > now) {
1109 1 : break;
1110 : }
1111 :
1112 14 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
1113 :
1114 14 : bdev_nvme_retry_io(nbdev_ch, bdev_io);
1115 : }
1116 :
1117 14 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1118 :
1119 14 : bdev_io = TAILQ_FIRST(&nbdev_ch->retry_io_list);
1120 14 : if (bdev_io != NULL) {
1121 4 : bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1122 :
1123 4 : delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
1124 :
1125 4 : nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
1126 : delay_us);
1127 : }
1128 :
1129 14 : return SPDK_POLLER_BUSY;
1130 : }
1131 :
1132 : static void
1133 15 : bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
1134 : struct nvme_bdev_io *bio, uint64_t delay_ms)
1135 : {
1136 15 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1137 : struct spdk_bdev_io *tmp_bdev_io;
1138 : struct nvme_bdev_io *tmp_bio;
1139 :
1140 15 : bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;
1141 :
1142 15 : TAILQ_FOREACH_REVERSE(tmp_bdev_io, &nbdev_ch->retry_io_list, retry_io_head, module_link) {
1143 1 : tmp_bio = (struct nvme_bdev_io *)tmp_bdev_io->driver_ctx;
1144 :
1145 1 : if (tmp_bio->retry_ticks <= bio->retry_ticks) {
1146 1 : TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bdev_io, bdev_io,
1147 : module_link);
1148 1 : return;
1149 : }
1150 : }
1151 :
1152 : /* No earlier I/Os were found. This I/O must be the new head. */
1153 14 : TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bdev_io, module_link);
1154 :
1155 14 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1156 :
1157 14 : nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
1158 : delay_ms * 1000ULL);
1159 : }
1160 :
1161 : static void
1162 30 : bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
1163 : {
1164 : struct spdk_bdev_io *bdev_io, *tmp_io;
1165 :
1166 30 : TAILQ_FOREACH_SAFE(bdev_io, &nbdev_ch->retry_io_list, module_link, tmp_io) {
1167 0 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
1168 0 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_ABORTED, NULL);
1169 : }
1170 :
1171 30 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1172 30 : }
1173 :
1174 : static int
1175 6 : bdev_nvme_abort_retry_io(struct nvme_bdev_channel *nbdev_ch,
1176 : struct nvme_bdev_io *bio_to_abort)
1177 : {
1178 : struct spdk_bdev_io *bdev_io_to_abort;
1179 :
1180 6 : TAILQ_FOREACH(bdev_io_to_abort, &nbdev_ch->retry_io_list, module_link) {
1181 1 : if ((struct nvme_bdev_io *)bdev_io_to_abort->driver_ctx == bio_to_abort) {
1182 1 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io_to_abort, module_link);
1183 1 : __bdev_nvme_io_complete(bdev_io_to_abort, SPDK_BDEV_IO_STATUS_ABORTED, NULL);
1184 1 : return 0;
1185 : }
1186 : }
1187 :
1188 5 : return -ENOENT;
1189 : }
1190 :
1191 : static void
1192 12 : bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl)
1193 : {
1194 : struct nvme_bdev *nbdev;
1195 : uint16_t sct, sc;
1196 :
1197 12 : assert(spdk_nvme_cpl_is_error(cpl));
1198 :
1199 12 : nbdev = bdev_io->bdev->ctxt;
1200 :
1201 12 : if (nbdev->err_stat == NULL) {
1202 12 : return;
1203 : }
1204 :
1205 0 : sct = cpl->status.sct;
1206 0 : sc = cpl->status.sc;
1207 :
1208 0 : pthread_mutex_lock(&nbdev->mutex);
1209 :
1210 0 : nbdev->err_stat->status_type[sct]++;
1211 0 : switch (sct) {
1212 0 : case SPDK_NVME_SCT_GENERIC:
1213 : case SPDK_NVME_SCT_COMMAND_SPECIFIC:
1214 : case SPDK_NVME_SCT_MEDIA_ERROR:
1215 : case SPDK_NVME_SCT_PATH:
1216 0 : nbdev->err_stat->status[sct][sc]++;
1217 0 : break;
1218 0 : default:
1219 0 : break;
1220 : }
1221 :
1222 0 : pthread_mutex_unlock(&nbdev->mutex);
1223 : }
1224 :
1225 : static inline void
1226 20 : bdev_nvme_update_io_path_stat(struct nvme_bdev_io *bio)
1227 : {
1228 20 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1229 20 : uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
1230 20 : uint32_t blocklen = bdev_io->bdev->blocklen;
1231 : struct spdk_bdev_io_stat *stat;
1232 : uint64_t tsc_diff;
1233 :
1234 20 : if (bio->io_path->stat == NULL) {
1235 20 : return;
1236 : }
1237 :
1238 0 : tsc_diff = spdk_get_ticks() - bio->submit_tsc;
1239 0 : stat = bio->io_path->stat;
1240 :
1241 0 : switch (bdev_io->type) {
1242 0 : case SPDK_BDEV_IO_TYPE_READ:
1243 0 : stat->bytes_read += num_blocks * blocklen;
1244 0 : stat->num_read_ops++;
1245 0 : stat->read_latency_ticks += tsc_diff;
1246 0 : if (stat->max_read_latency_ticks < tsc_diff) {
1247 0 : stat->max_read_latency_ticks = tsc_diff;
1248 : }
1249 0 : if (stat->min_read_latency_ticks > tsc_diff) {
1250 0 : stat->min_read_latency_ticks = tsc_diff;
1251 : }
1252 0 : break;
1253 0 : case SPDK_BDEV_IO_TYPE_WRITE:
1254 0 : stat->bytes_written += num_blocks * blocklen;
1255 0 : stat->num_write_ops++;
1256 0 : stat->write_latency_ticks += tsc_diff;
1257 0 : if (stat->max_write_latency_ticks < tsc_diff) {
1258 0 : stat->max_write_latency_ticks = tsc_diff;
1259 : }
1260 0 : if (stat->min_write_latency_ticks > tsc_diff) {
1261 0 : stat->min_write_latency_ticks = tsc_diff;
1262 : }
1263 0 : break;
1264 0 : case SPDK_BDEV_IO_TYPE_UNMAP:
1265 0 : stat->bytes_unmapped += num_blocks * blocklen;
1266 0 : stat->num_unmap_ops++;
1267 0 : stat->unmap_latency_ticks += tsc_diff;
1268 0 : if (stat->max_unmap_latency_ticks < tsc_diff) {
1269 0 : stat->max_unmap_latency_ticks = tsc_diff;
1270 : }
1271 0 : if (stat->min_unmap_latency_ticks > tsc_diff) {
1272 0 : stat->min_unmap_latency_ticks = tsc_diff;
1273 : }
1274 0 : break;
1275 0 : case SPDK_BDEV_IO_TYPE_ZCOPY:
1276 : /* Track the data in the start phase only */
1277 0 : if (!bdev_io->u.bdev.zcopy.start) {
1278 0 : break;
1279 : }
1280 0 : if (bdev_io->u.bdev.zcopy.populate) {
1281 0 : stat->bytes_read += num_blocks * blocklen;
1282 0 : stat->num_read_ops++;
1283 0 : stat->read_latency_ticks += tsc_diff;
1284 0 : if (stat->max_read_latency_ticks < tsc_diff) {
1285 0 : stat->max_read_latency_ticks = tsc_diff;
1286 : }
1287 0 : if (stat->min_read_latency_ticks > tsc_diff) {
1288 0 : stat->min_read_latency_ticks = tsc_diff;
1289 : }
1290 : } else {
1291 0 : stat->bytes_written += num_blocks * blocklen;
1292 0 : stat->num_write_ops++;
1293 0 : stat->write_latency_ticks += tsc_diff;
1294 0 : if (stat->max_write_latency_ticks < tsc_diff) {
1295 0 : stat->max_write_latency_ticks = tsc_diff;
1296 : }
1297 0 : if (stat->min_write_latency_ticks > tsc_diff) {
1298 0 : stat->min_write_latency_ticks = tsc_diff;
1299 : }
1300 : }
1301 0 : break;
1302 0 : case SPDK_BDEV_IO_TYPE_COPY:
1303 0 : stat->bytes_copied += num_blocks * blocklen;
1304 0 : stat->num_copy_ops++;
1305 0 : stat->copy_latency_ticks += tsc_diff;
1306 0 : if (stat->max_copy_latency_ticks < tsc_diff) {
1307 0 : stat->max_copy_latency_ticks = tsc_diff;
1308 : }
1309 0 : if (stat->min_copy_latency_ticks > tsc_diff) {
1310 0 : stat->min_copy_latency_ticks = tsc_diff;
1311 : }
1312 0 : break;
1313 0 : default:
1314 0 : break;
1315 : }
1316 : }
1317 :
1318 : static bool
1319 7 : bdev_nvme_check_retry_io(struct nvme_bdev_io *bio,
1320 : const struct spdk_nvme_cpl *cpl,
1321 : struct nvme_bdev_channel *nbdev_ch,
1322 : uint64_t *_delay_ms)
1323 : {
1324 7 : struct nvme_io_path *io_path = bio->io_path;
1325 7 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
1326 : const struct spdk_nvme_ctrlr_data *cdata;
1327 :
1328 7 : if (spdk_nvme_cpl_is_path_error(cpl) ||
1329 5 : spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
1330 4 : !nvme_io_path_is_available(io_path) ||
1331 4 : !nvme_ctrlr_is_available(nvme_ctrlr)) {
1332 3 : bdev_nvme_clear_current_io_path(nbdev_ch);
1333 3 : bio->io_path = NULL;
1334 3 : if (spdk_nvme_cpl_is_ana_error(cpl)) {
1335 1 : if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
1336 1 : io_path->nvme_ns->ana_state_updating = true;
1337 : }
1338 : }
1339 3 : if (!any_io_path_may_become_available(nbdev_ch)) {
1340 0 : return false;
1341 : }
1342 3 : *_delay_ms = 0;
1343 : } else {
1344 4 : bio->retry_count++;
1345 :
1346 4 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
1347 :
1348 4 : if (cpl->status.crd != 0) {
1349 1 : *_delay_ms = cdata->crdt[cpl->status.crd] * 100;
1350 : } else {
1351 3 : *_delay_ms = 0;
1352 : }
1353 : }
1354 :
1355 7 : return true;
1356 : }
1357 :
1358 : static inline void
1359 32 : bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
1360 : const struct spdk_nvme_cpl *cpl)
1361 : {
1362 32 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1363 : struct nvme_bdev_channel *nbdev_ch;
1364 32 : uint64_t delay_ms;
1365 :
1366 32 : assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
1367 :
1368 32 : if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
1369 20 : bdev_nvme_update_io_path_stat(bio);
1370 20 : goto complete;
1371 : }
1372 :
1373 : /* Update error counts before deciding if retry is needed.
1374 : * Hence, error counts may be more than the number of I/O errors.
1375 : */
1376 12 : bdev_nvme_update_nvme_error_stat(bdev_io, cpl);
1377 :
1378 12 : if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) ||
1379 8 : (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
1380 5 : goto complete;
1381 : }
1382 :
1383 : /* At this point we don't know whether the sequence was successfully executed or not, so we
1384 : * cannot retry the IO */
1385 7 : if (bdev_io->u.bdev.accel_sequence != NULL) {
1386 0 : goto complete;
1387 : }
1388 :
1389 7 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
1390 :
1391 7 : if (bdev_nvme_check_retry_io(bio, cpl, nbdev_ch, &delay_ms)) {
1392 7 : bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
1393 7 : return;
1394 : }
1395 :
1396 25 : complete:
1397 25 : bio->retry_count = 0;
1398 25 : bio->submit_tsc = 0;
1399 25 : bdev_io->u.bdev.accel_sequence = NULL;
1400 25 : __bdev_nvme_io_complete(bdev_io, 0, cpl);
1401 : }
1402 :
1403 : static inline void
1404 11 : bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
1405 : {
1406 11 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1407 : struct nvme_bdev_channel *nbdev_ch;
1408 : enum spdk_bdev_io_status io_status;
1409 :
1410 11 : assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
1411 :
1412 11 : switch (rc) {
1413 1 : case 0:
1414 1 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1415 1 : break;
1416 0 : case -ENOMEM:
1417 0 : io_status = SPDK_BDEV_IO_STATUS_NOMEM;
1418 0 : break;
1419 10 : case -ENXIO:
1420 10 : if (g_opts.bdev_retry_count == -1 || bio->retry_count < g_opts.bdev_retry_count) {
1421 10 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
1422 :
1423 10 : bdev_nvme_clear_current_io_path(nbdev_ch);
1424 10 : bio->io_path = NULL;
1425 :
1426 10 : if (any_io_path_may_become_available(nbdev_ch)) {
1427 8 : bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
1428 8 : return;
1429 : }
1430 : }
1431 :
1432 : /* fallthrough */
1433 : default:
1434 2 : spdk_accel_sequence_abort(bdev_io->u.bdev.accel_sequence);
1435 2 : bdev_io->u.bdev.accel_sequence = NULL;
1436 2 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
1437 2 : break;
1438 : }
1439 :
1440 3 : bio->retry_count = 0;
1441 3 : bio->submit_tsc = 0;
1442 3 : __bdev_nvme_io_complete(bdev_io, io_status, NULL);
1443 : }
1444 :
1445 : static inline void
1446 4 : bdev_nvme_admin_complete(struct nvme_bdev_io *bio, int rc)
1447 : {
1448 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1449 : enum spdk_bdev_io_status io_status;
1450 :
1451 4 : switch (rc) {
1452 1 : case 0:
1453 1 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1454 1 : break;
1455 0 : case -ENOMEM:
1456 0 : io_status = SPDK_BDEV_IO_STATUS_NOMEM;
1457 0 : break;
1458 3 : case -ENXIO:
1459 : /* fallthrough */
1460 : default:
1461 3 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
1462 3 : break;
1463 : }
1464 :
1465 4 : __bdev_nvme_io_complete(bdev_io, io_status, NULL);
1466 4 : }
1467 :
1468 : static void
1469 3 : bdev_nvme_clear_io_path_caches_done(struct spdk_io_channel_iter *i, int status)
1470 : {
1471 3 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
1472 :
1473 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1474 :
1475 3 : assert(nvme_ctrlr->io_path_cache_clearing == true);
1476 3 : nvme_ctrlr->io_path_cache_clearing = false;
1477 :
1478 3 : if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
1479 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1480 3 : return;
1481 : }
1482 :
1483 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1484 :
1485 0 : nvme_ctrlr_unregister(nvme_ctrlr);
1486 : }
1487 :
1488 : static void
1489 320 : _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
1490 : {
1491 : struct nvme_io_path *io_path;
1492 :
1493 459 : TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
1494 139 : if (io_path->nbdev_ch == NULL) {
1495 64 : continue;
1496 : }
1497 75 : bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
1498 : }
1499 320 : }
1500 :
1501 : static void
1502 1 : bdev_nvme_clear_io_path_cache(struct spdk_io_channel_iter *i)
1503 : {
1504 1 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
1505 1 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
1506 :
1507 1 : assert(ctrlr_ch->qpair != NULL);
1508 :
1509 1 : _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
1510 :
1511 1 : spdk_for_each_channel_continue(i, 0);
1512 1 : }
1513 :
1514 : static void
1515 3 : bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr)
1516 : {
1517 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1518 3 : if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
1519 : nvme_ctrlr->io_path_cache_clearing) {
1520 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1521 0 : return;
1522 : }
1523 :
1524 3 : nvme_ctrlr->io_path_cache_clearing = true;
1525 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1526 :
1527 3 : spdk_for_each_channel(nvme_ctrlr,
1528 : bdev_nvme_clear_io_path_cache,
1529 : NULL,
1530 : bdev_nvme_clear_io_path_caches_done);
1531 : }
1532 :
1533 : static struct nvme_qpair *
1534 99 : nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair *qpair)
1535 : {
1536 : struct nvme_qpair *nvme_qpair;
1537 :
1538 108 : TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
1539 108 : if (nvme_qpair->qpair == qpair) {
1540 99 : break;
1541 : }
1542 : }
1543 :
1544 99 : return nvme_qpair;
1545 : }
1546 :
1547 : static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair);
1548 :
1549 : static void
1550 99 : bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
1551 : {
1552 99 : struct nvme_poll_group *group = poll_group_ctx;
1553 : struct nvme_qpair *nvme_qpair;
1554 : struct nvme_ctrlr_channel *ctrlr_ch;
1555 : int status;
1556 :
1557 99 : nvme_qpair = nvme_poll_group_get_qpair(group, qpair);
1558 99 : if (nvme_qpair == NULL) {
1559 0 : return;
1560 : }
1561 :
1562 99 : if (nvme_qpair->qpair != NULL) {
1563 99 : spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
1564 99 : nvme_qpair->qpair = NULL;
1565 : }
1566 :
1567 99 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1568 :
1569 99 : ctrlr_ch = nvme_qpair->ctrlr_ch;
1570 :
1571 99 : if (ctrlr_ch != NULL) {
1572 56 : if (ctrlr_ch->reset_iter != NULL) {
1573 : /* We are in a full reset sequence. */
1574 52 : if (ctrlr_ch->connect_poller != NULL) {
1575 : /* qpair was failed to connect. Abort the reset sequence. */
1576 0 : SPDK_DEBUGLOG(bdev_nvme, "qpair %p was failed to connect. abort the reset ctrlr sequence.\n",
1577 : qpair);
1578 0 : spdk_poller_unregister(&ctrlr_ch->connect_poller);
1579 0 : status = -1;
1580 : } else {
1581 : /* qpair was completed to disconnect. Just move to the next ctrlr_channel. */
1582 52 : SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed in a reset ctrlr sequence.\n",
1583 : qpair);
1584 52 : status = 0;
1585 : }
1586 52 : spdk_for_each_channel_continue(ctrlr_ch->reset_iter, status);
1587 52 : ctrlr_ch->reset_iter = NULL;
1588 : } else {
1589 : /* qpair was disconnected unexpectedly. Reset controller for recovery. */
1590 4 : SPDK_NOTICELOG("qpair %p was disconnected and freed. reset controller.\n", qpair);
1591 4 : bdev_nvme_failover_ctrlr(nvme_qpair->ctrlr);
1592 : }
1593 : } else {
1594 : /* In this case, ctrlr_channel is already deleted. */
1595 43 : SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed. delete nvme_qpair.\n", qpair);
1596 43 : nvme_qpair_delete(nvme_qpair);
1597 : }
1598 : }
1599 :
1600 : static void
1601 0 : bdev_nvme_check_io_qpairs(struct nvme_poll_group *group)
1602 : {
1603 : struct nvme_qpair *nvme_qpair;
1604 :
1605 0 : TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
1606 0 : if (nvme_qpair->qpair == NULL || nvme_qpair->ctrlr_ch == NULL) {
1607 0 : continue;
1608 : }
1609 :
1610 0 : if (spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
1611 : SPDK_NVME_QPAIR_FAILURE_NONE) {
1612 0 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1613 : }
1614 : }
1615 0 : }
1616 :
1617 : static int
1618 1018 : bdev_nvme_poll(void *arg)
1619 : {
1620 1018 : struct nvme_poll_group *group = arg;
1621 : int64_t num_completions;
1622 :
1623 1018 : if (group->collect_spin_stat && group->start_ticks == 0) {
1624 0 : group->start_ticks = spdk_get_ticks();
1625 : }
1626 :
1627 1018 : num_completions = spdk_nvme_poll_group_process_completions(group->group, 0,
1628 : bdev_nvme_disconnected_qpair_cb);
1629 1018 : if (group->collect_spin_stat) {
1630 0 : if (num_completions > 0) {
1631 0 : if (group->end_ticks != 0) {
1632 0 : group->spin_ticks += (group->end_ticks - group->start_ticks);
1633 0 : group->end_ticks = 0;
1634 : }
1635 0 : group->start_ticks = 0;
1636 : } else {
1637 0 : group->end_ticks = spdk_get_ticks();
1638 : }
1639 : }
1640 :
1641 1018 : if (spdk_unlikely(num_completions < 0)) {
1642 0 : bdev_nvme_check_io_qpairs(group);
1643 : }
1644 :
1645 1018 : return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
1646 : }
1647 :
1648 : static int bdev_nvme_poll_adminq(void *arg);
1649 :
1650 : static void
1651 100 : bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_period_us)
1652 : {
1653 100 : spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
1654 :
1655 100 : nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq,
1656 : nvme_ctrlr, new_period_us);
1657 100 : }
1658 :
1659 : static int
1660 146 : bdev_nvme_poll_adminq(void *arg)
1661 : {
1662 : int32_t rc;
1663 146 : struct nvme_ctrlr *nvme_ctrlr = arg;
1664 : nvme_ctrlr_disconnected_cb disconnected_cb;
1665 :
1666 146 : assert(nvme_ctrlr != NULL);
1667 :
1668 146 : rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
1669 146 : if (rc < 0) {
1670 53 : disconnected_cb = nvme_ctrlr->disconnected_cb;
1671 53 : nvme_ctrlr->disconnected_cb = NULL;
1672 :
1673 53 : if (disconnected_cb != NULL) {
1674 50 : bdev_nvme_change_adminq_poll_period(nvme_ctrlr,
1675 : g_opts.nvme_adminq_poll_period_us);
1676 50 : disconnected_cb(nvme_ctrlr);
1677 : } else {
1678 3 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
1679 : }
1680 93 : } else if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(nvme_ctrlr->ctrlr) !=
1681 : SPDK_NVME_QPAIR_FAILURE_NONE) {
1682 0 : bdev_nvme_clear_io_path_caches(nvme_ctrlr);
1683 : }
1684 :
1685 146 : return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
1686 : }
1687 :
1688 : static void
1689 37 : nvme_bdev_free(void *io_device)
1690 : {
1691 37 : struct nvme_bdev *nvme_disk = io_device;
1692 :
1693 37 : pthread_mutex_destroy(&nvme_disk->mutex);
1694 37 : free(nvme_disk->disk.name);
1695 37 : free(nvme_disk->err_stat);
1696 37 : free(nvme_disk);
1697 37 : }
1698 :
1699 : static int
1700 36 : bdev_nvme_destruct(void *ctx)
1701 : {
1702 36 : struct nvme_bdev *nvme_disk = ctx;
1703 : struct nvme_ns *nvme_ns, *tmp_nvme_ns;
1704 :
1705 : SPDK_DTRACE_PROBE2(bdev_nvme_destruct, nvme_disk->nbdev_ctrlr->name, nvme_disk->nsid);
1706 :
1707 73 : TAILQ_FOREACH_SAFE(nvme_ns, &nvme_disk->nvme_ns_list, tailq, tmp_nvme_ns) {
1708 37 : pthread_mutex_lock(&nvme_ns->ctrlr->mutex);
1709 :
1710 37 : nvme_ns->bdev = NULL;
1711 :
1712 37 : assert(nvme_ns->id > 0);
1713 :
1714 37 : if (nvme_ctrlr_get_ns(nvme_ns->ctrlr, nvme_ns->id) == NULL) {
1715 0 : pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
1716 :
1717 0 : nvme_ctrlr_release(nvme_ns->ctrlr);
1718 0 : nvme_ns_free(nvme_ns);
1719 : } else {
1720 37 : pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
1721 : }
1722 : }
1723 :
1724 36 : pthread_mutex_lock(&g_bdev_nvme_mutex);
1725 36 : TAILQ_REMOVE(&nvme_disk->nbdev_ctrlr->bdevs, nvme_disk, tailq);
1726 36 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
1727 :
1728 36 : spdk_io_device_unregister(nvme_disk, nvme_bdev_free);
1729 :
1730 36 : return 0;
1731 : }
1732 :
1733 : static int
1734 100 : bdev_nvme_create_qpair(struct nvme_qpair *nvme_qpair)
1735 : {
1736 : struct nvme_ctrlr *nvme_ctrlr;
1737 100 : struct spdk_nvme_io_qpair_opts opts;
1738 : struct spdk_nvme_qpair *qpair;
1739 : int rc;
1740 :
1741 100 : nvme_ctrlr = nvme_qpair->ctrlr;
1742 :
1743 100 : spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
1744 100 : opts.delay_cmd_submit = g_opts.delay_cmd_submit;
1745 100 : opts.create_only = true;
1746 100 : opts.async_mode = true;
1747 100 : opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests);
1748 100 : g_opts.io_queue_requests = opts.io_queue_requests;
1749 :
1750 100 : qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
1751 100 : if (qpair == NULL) {
1752 0 : return -1;
1753 : }
1754 :
1755 : SPDK_DTRACE_PROBE3(bdev_nvme_create_qpair, nvme_ctrlr->nbdev_ctrlr->name,
1756 : spdk_nvme_qpair_get_id(qpair), spdk_thread_get_id(nvme_ctrlr->thread));
1757 :
1758 100 : assert(nvme_qpair->group != NULL);
1759 :
1760 100 : rc = spdk_nvme_poll_group_add(nvme_qpair->group->group, qpair);
1761 100 : if (rc != 0) {
1762 0 : SPDK_ERRLOG("Unable to begin polling on NVMe Channel.\n");
1763 0 : goto err;
1764 : }
1765 :
1766 100 : rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_ctrlr->ctrlr, qpair);
1767 100 : if (rc != 0) {
1768 0 : SPDK_ERRLOG("Unable to connect I/O qpair.\n");
1769 0 : goto err;
1770 : }
1771 :
1772 100 : nvme_qpair->qpair = qpair;
1773 :
1774 100 : if (!g_opts.disable_auto_failback) {
1775 71 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1776 : }
1777 :
1778 100 : return 0;
1779 :
1780 0 : err:
1781 0 : spdk_nvme_ctrlr_free_io_qpair(qpair);
1782 :
1783 0 : return rc;
1784 : }
1785 :
1786 : static void
1787 82 : bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i)
1788 : {
1789 82 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
1790 82 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
1791 82 : enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
1792 : struct spdk_bdev_io *bdev_io;
1793 :
1794 82 : if (spdk_io_channel_iter_get_ctx(i) != NULL) {
1795 35 : status = SPDK_BDEV_IO_STATUS_FAILED;
1796 : }
1797 :
1798 85 : while (!TAILQ_EMPTY(&ctrlr_ch->pending_resets)) {
1799 3 : bdev_io = TAILQ_FIRST(&ctrlr_ch->pending_resets);
1800 3 : TAILQ_REMOVE(&ctrlr_ch->pending_resets, bdev_io, module_link);
1801 3 : __bdev_nvme_io_complete(bdev_io, status, NULL);
1802 : }
1803 :
1804 82 : spdk_for_each_channel_continue(i, 0);
1805 82 : }
1806 :
1807 : /* This function marks the current trid as failed by storing the current ticks
1808 : * and then sets the next trid to the active trid within a controller if exists.
1809 : *
1810 : * The purpose of the boolean return value is to request the caller to disconnect
1811 : * the current trid now to try connecting the next trid.
1812 : */
1813 : static bool
1814 36 : bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove, bool start)
1815 : {
1816 : struct nvme_path_id *path_id, *next_path;
1817 : int rc __attribute__((unused));
1818 :
1819 36 : path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
1820 36 : assert(path_id);
1821 36 : assert(path_id == nvme_ctrlr->active_path_id);
1822 36 : next_path = TAILQ_NEXT(path_id, link);
1823 :
1824 : /* Update the last failed time. It means the trid is failed if its last
1825 : * failed time is non-zero.
1826 : */
1827 36 : path_id->last_failed_tsc = spdk_get_ticks();
1828 :
1829 36 : if (next_path == NULL) {
1830 : /* There is no alternate trid within a controller. */
1831 25 : return false;
1832 : }
1833 :
1834 11 : if (!start && nvme_ctrlr->opts.reconnect_delay_sec == 0) {
1835 : /* Connect is not retried in a controller reset sequence. Connecting
1836 : * the next trid will be done by the next bdev_nvme_failover_ctrlr() call.
1837 : */
1838 3 : return false;
1839 : }
1840 :
1841 8 : assert(path_id->trid.trtype != SPDK_NVME_TRANSPORT_PCIE);
1842 :
1843 8 : SPDK_NOTICELOG("Start failover from %s:%s to %s:%s\n", path_id->trid.traddr,
1844 : path_id->trid.trsvcid, next_path->trid.traddr, next_path->trid.trsvcid);
1845 :
1846 8 : spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
1847 8 : nvme_ctrlr->active_path_id = next_path;
1848 8 : rc = spdk_nvme_ctrlr_set_trid(nvme_ctrlr->ctrlr, &next_path->trid);
1849 8 : assert(rc == 0);
1850 8 : TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
1851 8 : if (!remove) {
1852 : /** Shuffle the old trid to the end of the list and use the new one.
1853 : * Allows for round robin through multiple connections.
1854 : */
1855 6 : TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, path_id, link);
1856 : } else {
1857 2 : free(path_id);
1858 : }
1859 :
1860 8 : if (start || next_path->last_failed_tsc == 0) {
1861 : /* bdev_nvme_failover_ctrlr() is just called or the next trid is not failed
1862 : * or used yet. Try the next trid now.
1863 : */
1864 7 : return true;
1865 : }
1866 :
1867 1 : if (spdk_get_ticks() > next_path->last_failed_tsc + spdk_get_ticks_hz() *
1868 1 : nvme_ctrlr->opts.reconnect_delay_sec) {
1869 : /* Enough backoff passed since the next trid failed. Try the next trid now. */
1870 0 : return true;
1871 : }
1872 :
1873 : /* The next trid will be tried after reconnect_delay_sec seconds. */
1874 1 : return false;
1875 : }
1876 :
1877 : static bool
1878 68 : bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr)
1879 : {
1880 : int32_t elapsed;
1881 :
1882 68 : if (nvme_ctrlr->opts.ctrlr_loss_timeout_sec == 0 ||
1883 36 : nvme_ctrlr->opts.ctrlr_loss_timeout_sec == -1) {
1884 42 : return false;
1885 : }
1886 :
1887 26 : elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
1888 26 : if (elapsed >= nvme_ctrlr->opts.ctrlr_loss_timeout_sec) {
1889 6 : return true;
1890 : } else {
1891 20 : return false;
1892 : }
1893 : }
1894 :
1895 : static bool
1896 12 : bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr)
1897 : {
1898 : uint32_t elapsed;
1899 :
1900 12 : if (nvme_ctrlr->opts.fast_io_fail_timeout_sec == 0) {
1901 8 : return false;
1902 : }
1903 :
1904 4 : elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
1905 4 : if (elapsed >= nvme_ctrlr->opts.fast_io_fail_timeout_sec) {
1906 2 : return true;
1907 : } else {
1908 2 : return false;
1909 : }
1910 : }
1911 :
1912 : static void bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success);
1913 :
1914 : static void
1915 51 : nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb cb_fn)
1916 : {
1917 : int rc;
1918 :
1919 51 : rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
1920 51 : if (rc != 0) {
1921 : /* Disconnect fails if ctrlr is already resetting or removed. In this case,
1922 : * fail the reset sequence immediately.
1923 : */
1924 1 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
1925 1 : return;
1926 : }
1927 :
1928 : /* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
1929 : * Set callback here to execute the specified operation after ctrlr is really disconnected.
1930 : */
1931 50 : assert(nvme_ctrlr->disconnected_cb == NULL);
1932 50 : nvme_ctrlr->disconnected_cb = cb_fn;
1933 :
1934 : /* During disconnection, reduce the period to poll adminq more often. */
1935 50 : bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0);
1936 : }
1937 :
1938 : enum bdev_nvme_op_after_reset {
1939 : OP_NONE,
1940 : OP_COMPLETE_PENDING_DESTRUCT,
1941 : OP_DESTRUCT,
1942 : OP_DELAYED_RECONNECT,
1943 : OP_FAILOVER,
1944 : };
1945 :
1946 : typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset;
1947 :
1948 : static _bdev_nvme_op_after_reset
1949 50 : bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success)
1950 : {
1951 50 : if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
1952 : /* Complete pending destruct after reset completes. */
1953 0 : return OP_COMPLETE_PENDING_DESTRUCT;
1954 50 : } else if (nvme_ctrlr->pending_failover) {
1955 3 : nvme_ctrlr->pending_failover = false;
1956 3 : nvme_ctrlr->reset_start_tsc = 0;
1957 3 : return OP_FAILOVER;
1958 47 : } else if (success || nvme_ctrlr->opts.reconnect_delay_sec == 0) {
1959 33 : nvme_ctrlr->reset_start_tsc = 0;
1960 33 : return OP_NONE;
1961 14 : } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
1962 2 : return OP_DESTRUCT;
1963 : } else {
1964 12 : if (bdev_nvme_check_fast_io_fail_timeout(nvme_ctrlr)) {
1965 2 : nvme_ctrlr->fast_io_fail_timedout = true;
1966 : }
1967 12 : return OP_DELAYED_RECONNECT;
1968 : }
1969 : }
1970 :
1971 : static int bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug);
1972 : static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
1973 :
1974 : static int
1975 9 : bdev_nvme_reconnect_delay_timer_expired(void *ctx)
1976 : {
1977 9 : struct nvme_ctrlr *nvme_ctrlr = ctx;
1978 :
1979 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect_delay, nvme_ctrlr->nbdev_ctrlr->name);
1980 9 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1981 :
1982 9 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
1983 :
1984 9 : if (!nvme_ctrlr->reconnect_is_delayed) {
1985 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1986 0 : return SPDK_POLLER_BUSY;
1987 : }
1988 :
1989 9 : nvme_ctrlr->reconnect_is_delayed = false;
1990 :
1991 9 : if (nvme_ctrlr->destruct) {
1992 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1993 0 : return SPDK_POLLER_BUSY;
1994 : }
1995 :
1996 9 : assert(nvme_ctrlr->resetting == false);
1997 9 : nvme_ctrlr->resetting = true;
1998 :
1999 9 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2000 :
2001 9 : spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
2002 :
2003 9 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2004 9 : return SPDK_POLLER_BUSY;
2005 : }
2006 :
2007 : static void
2008 12 : bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)
2009 : {
2010 12 : spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
2011 :
2012 12 : assert(nvme_ctrlr->reconnect_is_delayed == false);
2013 12 : nvme_ctrlr->reconnect_is_delayed = true;
2014 :
2015 12 : assert(nvme_ctrlr->reconnect_delay_timer == NULL);
2016 12 : nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired,
2017 : nvme_ctrlr,
2018 : nvme_ctrlr->opts.reconnect_delay_sec * SPDK_SEC_TO_USEC);
2019 12 : }
2020 :
2021 : static void remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr);
2022 :
2023 : static void
2024 48 : _bdev_nvme_reset_ctrlr_complete(struct spdk_io_channel_iter *i, int status)
2025 : {
2026 48 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2027 48 : bool success = spdk_io_channel_iter_get_ctx(i) == NULL;
2028 48 : bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
2029 48 : void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
2030 : enum bdev_nvme_op_after_reset op_after_reset;
2031 :
2032 48 : assert(nvme_ctrlr->thread == spdk_get_thread());
2033 :
2034 48 : nvme_ctrlr->ctrlr_op_cb_fn = NULL;
2035 48 : nvme_ctrlr->ctrlr_op_cb_arg = NULL;
2036 :
2037 48 : if (!success) {
2038 21 : SPDK_ERRLOG("Resetting controller failed.\n");
2039 : } else {
2040 27 : SPDK_NOTICELOG("Resetting controller successful.\n");
2041 : }
2042 :
2043 48 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2044 48 : nvme_ctrlr->resetting = false;
2045 48 : nvme_ctrlr->dont_retry = false;
2046 48 : nvme_ctrlr->in_failover = false;
2047 :
2048 48 : op_after_reset = bdev_nvme_check_op_after_reset(nvme_ctrlr, success);
2049 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2050 :
2051 : /* Delay callbacks when the next operation is a failover. */
2052 48 : if (ctrlr_op_cb_fn && op_after_reset != OP_FAILOVER) {
2053 10 : ctrlr_op_cb_fn(ctrlr_op_cb_arg, success ? 0 : -1);
2054 : }
2055 :
2056 48 : switch (op_after_reset) {
2057 0 : case OP_COMPLETE_PENDING_DESTRUCT:
2058 0 : nvme_ctrlr_unregister(nvme_ctrlr);
2059 0 : break;
2060 2 : case OP_DESTRUCT:
2061 2 : bdev_nvme_delete_ctrlr(nvme_ctrlr, false);
2062 2 : remove_discovery_entry(nvme_ctrlr);
2063 2 : break;
2064 12 : case OP_DELAYED_RECONNECT:
2065 12 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_start_reconnect_delay_timer);
2066 12 : break;
2067 3 : case OP_FAILOVER:
2068 3 : nvme_ctrlr->ctrlr_op_cb_fn = ctrlr_op_cb_fn;
2069 3 : nvme_ctrlr->ctrlr_op_cb_arg = ctrlr_op_cb_arg;
2070 3 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
2071 3 : break;
2072 31 : default:
2073 31 : break;
2074 : }
2075 48 : }
2076 :
2077 : static void
2078 50 : bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
2079 : {
2080 50 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2081 50 : if (!success) {
2082 : /* Connecting the active trid failed. Set the next alternate trid to the
2083 : * active trid if it exists.
2084 : */
2085 23 : if (bdev_nvme_failover_trid(nvme_ctrlr, false, false)) {
2086 : /* The next alternate trid exists and is ready to try. Try it now. */
2087 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2088 :
2089 2 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
2090 2 : return;
2091 : }
2092 :
2093 : /* We came here if there is no alternate trid or if the next trid exists but
2094 : * is not ready to try. We will try the active trid after reconnect_delay_sec
2095 : * seconds if it is non-zero or at the next reset call otherwise.
2096 : */
2097 : } else {
2098 : /* Connecting the active trid succeeded. Clear the last failed time because it
2099 : * means the trid is failed if its last failed time is non-zero.
2100 : */
2101 27 : nvme_ctrlr->active_path_id->last_failed_tsc = 0;
2102 : }
2103 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2104 :
2105 : /* Make sure we clear any pending resets before returning. */
2106 48 : spdk_for_each_channel(nvme_ctrlr,
2107 : bdev_nvme_complete_pending_resets,
2108 : success ? NULL : (void *)0x1,
2109 : _bdev_nvme_reset_ctrlr_complete);
2110 : }
2111 :
2112 : static void
2113 0 : bdev_nvme_reset_create_qpairs_failed(struct spdk_io_channel_iter *i, int status)
2114 : {
2115 0 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2116 :
2117 0 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2118 0 : }
2119 :
2120 : static void
2121 62 : bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i)
2122 : {
2123 62 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
2124 62 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
2125 : struct nvme_qpair *nvme_qpair;
2126 :
2127 62 : nvme_qpair = ctrlr_ch->qpair;
2128 62 : assert(nvme_qpair != NULL);
2129 :
2130 62 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
2131 :
2132 62 : if (nvme_qpair->qpair != NULL) {
2133 52 : if (nvme_qpair->ctrlr->dont_retry) {
2134 39 : spdk_nvme_qpair_set_abort_dnr(nvme_qpair->qpair, true);
2135 : }
2136 52 : spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
2137 :
2138 : /* The current full reset sequence will move to the next
2139 : * ctrlr_channel after the qpair is actually disconnected.
2140 : */
2141 52 : assert(ctrlr_ch->reset_iter == NULL);
2142 52 : ctrlr_ch->reset_iter = i;
2143 : } else {
2144 10 : spdk_for_each_channel_continue(i, 0);
2145 : }
2146 62 : }
2147 :
2148 : static void
2149 27 : bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status)
2150 : {
2151 27 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2152 :
2153 27 : if (status == 0) {
2154 27 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, true);
2155 : } else {
2156 : /* Delete the added qpairs and quiesce ctrlr to make the states clean. */
2157 0 : spdk_for_each_channel(nvme_ctrlr,
2158 : bdev_nvme_reset_destroy_qpair,
2159 : NULL,
2160 : bdev_nvme_reset_create_qpairs_failed);
2161 : }
2162 27 : }
2163 :
2164 : static int
2165 43 : bdev_nvme_reset_check_qpair_connected(void *ctx)
2166 : {
2167 43 : struct nvme_ctrlr_channel *ctrlr_ch = ctx;
2168 :
2169 43 : if (ctrlr_ch->reset_iter == NULL) {
2170 : /* qpair was already failed to connect and the reset sequence is being aborted. */
2171 0 : assert(ctrlr_ch->connect_poller == NULL);
2172 0 : assert(ctrlr_ch->qpair->qpair == NULL);
2173 0 : return SPDK_POLLER_BUSY;
2174 : }
2175 :
2176 43 : assert(ctrlr_ch->qpair->qpair != NULL);
2177 :
2178 43 : if (!spdk_nvme_qpair_is_connected(ctrlr_ch->qpair->qpair)) {
2179 0 : return SPDK_POLLER_BUSY;
2180 : }
2181 :
2182 43 : spdk_poller_unregister(&ctrlr_ch->connect_poller);
2183 :
2184 : /* qpair was completed to connect. Move to the next ctrlr_channel */
2185 43 : spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
2186 43 : ctrlr_ch->reset_iter = NULL;
2187 :
2188 43 : if (!g_opts.disable_auto_failback) {
2189 30 : _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
2190 : }
2191 :
2192 43 : return SPDK_POLLER_BUSY;
2193 : }
2194 :
2195 : static void
2196 43 : bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i)
2197 : {
2198 43 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
2199 43 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
2200 : int rc;
2201 :
2202 43 : rc = bdev_nvme_create_qpair(ctrlr_ch->qpair);
2203 43 : if (rc == 0) {
2204 43 : ctrlr_ch->connect_poller = SPDK_POLLER_REGISTER(bdev_nvme_reset_check_qpair_connected,
2205 : ctrlr_ch, 0);
2206 :
2207 : /* The current full reset sequence will move to the next
2208 : * ctrlr_channel after the qpair is actually connected.
2209 : */
2210 43 : assert(ctrlr_ch->reset_iter == NULL);
2211 43 : ctrlr_ch->reset_iter = i;
2212 : } else {
2213 0 : spdk_for_each_channel_continue(i, rc);
2214 : }
2215 43 : }
2216 :
2217 : static void
2218 27 : nvme_ctrlr_check_namespaces(struct nvme_ctrlr *nvme_ctrlr)
2219 : {
2220 27 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
2221 : struct nvme_ns *nvme_ns;
2222 :
2223 27 : for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
2224 39 : nvme_ns != NULL;
2225 12 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
2226 12 : if (!spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
2227 1 : SPDK_DEBUGLOG(bdev_nvme, "NSID %u was removed during reset.\n", nvme_ns->id);
2228 : /* NS can be added again. Just nullify nvme_ns->ns. */
2229 1 : nvme_ns->ns = NULL;
2230 : }
2231 : }
2232 27 : }
2233 :
2234 :
2235 : static int
2236 49 : bdev_nvme_reconnect_ctrlr_poll(void *arg)
2237 : {
2238 49 : struct nvme_ctrlr *nvme_ctrlr = arg;
2239 49 : int rc = -ETIMEDOUT;
2240 :
2241 49 : if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
2242 : /* Mark the ctrlr as failed. The next call to
2243 : * spdk_nvme_ctrlr_reconnect_poll_async() will then
2244 : * do the necessary cleanup and return failure.
2245 : */
2246 2 : spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
2247 : }
2248 :
2249 49 : rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
2250 49 : if (rc == -EAGAIN) {
2251 0 : return SPDK_POLLER_BUSY;
2252 : }
2253 :
2254 49 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
2255 49 : if (rc == 0) {
2256 27 : nvme_ctrlr_check_namespaces(nvme_ctrlr);
2257 :
2258 : /* Recreate all of the I/O queue pairs */
2259 27 : spdk_for_each_channel(nvme_ctrlr,
2260 : bdev_nvme_reset_create_qpair,
2261 : NULL,
2262 : bdev_nvme_reset_create_qpairs_done);
2263 : } else {
2264 22 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2265 : }
2266 49 : return SPDK_POLLER_BUSY;
2267 : }
2268 :
2269 : static void
2270 49 : bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2271 : {
2272 49 : spdk_nvme_ctrlr_reconnect_async(nvme_ctrlr->ctrlr);
2273 :
2274 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect, nvme_ctrlr->nbdev_ctrlr->name);
2275 49 : assert(nvme_ctrlr->reset_detach_poller == NULL);
2276 49 : nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_ctrlr_poll,
2277 : nvme_ctrlr, 0);
2278 49 : }
2279 :
2280 : static void
2281 36 : bdev_nvme_reset_destroy_qpair_done(struct spdk_io_channel_iter *i, int status)
2282 : {
2283 36 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2284 :
2285 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reset, nvme_ctrlr->nbdev_ctrlr->name);
2286 36 : assert(status == 0);
2287 :
2288 36 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2289 0 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2290 : } else {
2291 36 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
2292 : }
2293 36 : }
2294 :
2295 : static void
2296 36 : bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
2297 : {
2298 36 : spdk_for_each_channel(nvme_ctrlr,
2299 : bdev_nvme_reset_destroy_qpair,
2300 : NULL,
2301 : bdev_nvme_reset_destroy_qpair_done);
2302 36 : }
2303 :
2304 : static void
2305 3 : bdev_nvme_reconnect_ctrlr_now(void *ctx)
2306 : {
2307 3 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2308 :
2309 3 : assert(nvme_ctrlr->resetting == true);
2310 3 : assert(nvme_ctrlr->thread == spdk_get_thread());
2311 :
2312 3 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2313 :
2314 3 : spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
2315 :
2316 3 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2317 3 : }
2318 :
2319 : static void
2320 36 : _bdev_nvme_reset_ctrlr(void *ctx)
2321 : {
2322 36 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2323 :
2324 36 : assert(nvme_ctrlr->resetting == true);
2325 36 : assert(nvme_ctrlr->thread == spdk_get_thread());
2326 :
2327 36 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2328 0 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reset_destroy_qpairs);
2329 : } else {
2330 36 : bdev_nvme_reset_destroy_qpairs(nvme_ctrlr);
2331 : }
2332 36 : }
2333 :
2334 : static int
2335 33 : bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2336 : {
2337 : spdk_msg_fn msg_fn;
2338 :
2339 33 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2340 33 : if (nvme_ctrlr->destruct) {
2341 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2342 3 : return -ENXIO;
2343 : }
2344 :
2345 30 : if (nvme_ctrlr->resetting) {
2346 5 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2347 5 : SPDK_NOTICELOG("Unable to perform reset, already in progress.\n");
2348 5 : return -EBUSY;
2349 : }
2350 :
2351 25 : if (nvme_ctrlr->disabled) {
2352 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2353 0 : SPDK_NOTICELOG("Unable to perform reset. Controller is disabled.\n");
2354 0 : return -EALREADY;
2355 : }
2356 :
2357 25 : nvme_ctrlr->resetting = true;
2358 25 : nvme_ctrlr->dont_retry = true;
2359 :
2360 25 : if (nvme_ctrlr->reconnect_is_delayed) {
2361 1 : SPDK_DEBUGLOG(bdev_nvme, "Reconnect is already scheduled.\n");
2362 1 : msg_fn = bdev_nvme_reconnect_ctrlr_now;
2363 1 : nvme_ctrlr->reconnect_is_delayed = false;
2364 : } else {
2365 24 : msg_fn = _bdev_nvme_reset_ctrlr;
2366 24 : assert(nvme_ctrlr->reset_start_tsc == 0);
2367 : }
2368 :
2369 25 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2370 :
2371 25 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2372 :
2373 25 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
2374 25 : return 0;
2375 : }
2376 :
2377 : static int
2378 3 : bdev_nvme_enable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2379 : {
2380 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2381 3 : if (nvme_ctrlr->destruct) {
2382 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2383 0 : return -ENXIO;
2384 : }
2385 :
2386 3 : if (nvme_ctrlr->resetting) {
2387 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2388 0 : return -EBUSY;
2389 : }
2390 :
2391 3 : if (!nvme_ctrlr->disabled) {
2392 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2393 1 : return -EALREADY;
2394 : }
2395 :
2396 2 : nvme_ctrlr->disabled = false;
2397 2 : nvme_ctrlr->resetting = true;
2398 :
2399 2 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2400 :
2401 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2402 :
2403 2 : spdk_thread_send_msg(nvme_ctrlr->thread, bdev_nvme_reconnect_ctrlr_now, nvme_ctrlr);
2404 2 : return 0;
2405 : }
2406 :
2407 : static void
2408 2 : _bdev_nvme_disable_ctrlr_complete(struct spdk_io_channel_iter *i, int status)
2409 : {
2410 2 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2411 2 : bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
2412 2 : void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
2413 : enum bdev_nvme_op_after_reset op_after_disable;
2414 :
2415 2 : assert(nvme_ctrlr->thread == spdk_get_thread());
2416 :
2417 2 : nvme_ctrlr->ctrlr_op_cb_fn = NULL;
2418 2 : nvme_ctrlr->ctrlr_op_cb_arg = NULL;
2419 :
2420 2 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2421 :
2422 2 : nvme_ctrlr->resetting = false;
2423 2 : nvme_ctrlr->dont_retry = false;
2424 :
2425 2 : op_after_disable = bdev_nvme_check_op_after_reset(nvme_ctrlr, true);
2426 :
2427 2 : nvme_ctrlr->disabled = true;
2428 2 : spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
2429 :
2430 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2431 :
2432 2 : if (ctrlr_op_cb_fn) {
2433 0 : ctrlr_op_cb_fn(ctrlr_op_cb_arg, 0);
2434 : }
2435 :
2436 2 : switch (op_after_disable) {
2437 0 : case OP_COMPLETE_PENDING_DESTRUCT:
2438 0 : nvme_ctrlr_unregister(nvme_ctrlr);
2439 0 : break;
2440 2 : default:
2441 2 : break;
2442 : }
2443 :
2444 2 : }
2445 :
2446 : static void
2447 2 : bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr)
2448 : {
2449 : /* Make sure we clear any pending resets before returning. */
2450 2 : spdk_for_each_channel(nvme_ctrlr,
2451 : bdev_nvme_complete_pending_resets,
2452 : NULL,
2453 : _bdev_nvme_disable_ctrlr_complete);
2454 2 : }
2455 :
2456 : static void
2457 1 : bdev_nvme_disable_destroy_qpairs_done(struct spdk_io_channel_iter *i, int status)
2458 : {
2459 1 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2460 :
2461 1 : assert(status == 0);
2462 :
2463 1 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2464 0 : bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
2465 : } else {
2466 1 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_ctrlr_complete);
2467 : }
2468 1 : }
2469 :
2470 : static void
2471 1 : bdev_nvme_disable_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
2472 : {
2473 1 : spdk_for_each_channel(nvme_ctrlr,
2474 : bdev_nvme_reset_destroy_qpair,
2475 : NULL,
2476 : bdev_nvme_disable_destroy_qpairs_done);
2477 1 : }
2478 :
2479 : static void
2480 1 : _bdev_nvme_cancel_reconnect_and_disable_ctrlr(void *ctx)
2481 : {
2482 1 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2483 :
2484 1 : assert(nvme_ctrlr->resetting == true);
2485 1 : assert(nvme_ctrlr->thread == spdk_get_thread());
2486 :
2487 1 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2488 :
2489 1 : bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
2490 1 : }
2491 :
2492 : static void
2493 1 : _bdev_nvme_disconnect_and_disable_ctrlr(void *ctx)
2494 : {
2495 1 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2496 :
2497 1 : assert(nvme_ctrlr->resetting == true);
2498 1 : assert(nvme_ctrlr->thread == spdk_get_thread());
2499 :
2500 1 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2501 0 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_destroy_qpairs);
2502 : } else {
2503 1 : bdev_nvme_disable_destroy_qpairs(nvme_ctrlr);
2504 : }
2505 1 : }
2506 :
2507 : static int
2508 5 : bdev_nvme_disable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2509 : {
2510 : spdk_msg_fn msg_fn;
2511 :
2512 5 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2513 5 : if (nvme_ctrlr->destruct) {
2514 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2515 1 : return -ENXIO;
2516 : }
2517 :
2518 4 : if (nvme_ctrlr->resetting) {
2519 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2520 1 : return -EBUSY;
2521 : }
2522 :
2523 3 : if (nvme_ctrlr->disabled) {
2524 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2525 1 : return -EALREADY;
2526 : }
2527 :
2528 2 : nvme_ctrlr->resetting = true;
2529 2 : nvme_ctrlr->dont_retry = true;
2530 :
2531 2 : if (nvme_ctrlr->reconnect_is_delayed) {
2532 1 : msg_fn = _bdev_nvme_cancel_reconnect_and_disable_ctrlr;
2533 1 : nvme_ctrlr->reconnect_is_delayed = false;
2534 : } else {
2535 1 : msg_fn = _bdev_nvme_disconnect_and_disable_ctrlr;
2536 : }
2537 :
2538 2 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2539 :
2540 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2541 :
2542 2 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
2543 2 : return 0;
2544 : }
2545 :
2546 : static int
2547 15 : nvme_ctrlr_op(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
2548 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2549 : {
2550 : int rc;
2551 :
2552 15 : switch (op) {
2553 14 : case NVME_CTRLR_OP_RESET:
2554 14 : rc = bdev_nvme_reset_ctrlr(nvme_ctrlr);
2555 14 : break;
2556 0 : case NVME_CTRLR_OP_ENABLE:
2557 0 : rc = bdev_nvme_enable_ctrlr(nvme_ctrlr);
2558 0 : break;
2559 0 : case NVME_CTRLR_OP_DISABLE:
2560 0 : rc = bdev_nvme_disable_ctrlr(nvme_ctrlr);
2561 0 : break;
2562 1 : default:
2563 1 : rc = -EINVAL;
2564 1 : break;
2565 : }
2566 :
2567 15 : if (rc == 0) {
2568 9 : assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
2569 9 : assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
2570 9 : nvme_ctrlr->ctrlr_op_cb_fn = cb_fn;
2571 9 : nvme_ctrlr->ctrlr_op_cb_arg = cb_arg;
2572 : }
2573 15 : return rc;
2574 : }
2575 :
2576 : struct nvme_ctrlr_op_rpc_ctx {
2577 : struct nvme_ctrlr *nvme_ctrlr;
2578 : struct spdk_thread *orig_thread;
2579 : enum nvme_ctrlr_op op;
2580 : int rc;
2581 : bdev_nvme_ctrlr_op_cb cb_fn;
2582 : void *cb_arg;
2583 : };
2584 :
2585 : static void
2586 4 : _nvme_ctrlr_op_rpc_complete(void *_ctx)
2587 : {
2588 4 : struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
2589 :
2590 4 : assert(ctx != NULL);
2591 4 : assert(ctx->cb_fn != NULL);
2592 :
2593 4 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
2594 :
2595 4 : free(ctx);
2596 4 : }
2597 :
2598 : static void
2599 4 : nvme_ctrlr_op_rpc_complete(void *cb_arg, int rc)
2600 : {
2601 4 : struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
2602 :
2603 4 : ctx->rc = rc;
2604 :
2605 4 : spdk_thread_send_msg(ctx->orig_thread, _nvme_ctrlr_op_rpc_complete, ctx);
2606 4 : }
2607 :
2608 : void
2609 4 : nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
2610 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2611 : {
2612 : struct nvme_ctrlr_op_rpc_ctx *ctx;
2613 : int rc;
2614 :
2615 4 : assert(cb_fn != NULL);
2616 :
2617 4 : ctx = calloc(1, sizeof(*ctx));
2618 4 : if (ctx == NULL) {
2619 0 : SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
2620 0 : cb_fn(cb_arg, -ENOMEM);
2621 0 : return;
2622 : }
2623 :
2624 4 : ctx->orig_thread = spdk_get_thread();
2625 4 : ctx->cb_fn = cb_fn;
2626 4 : ctx->cb_arg = cb_arg;
2627 :
2628 4 : rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_ctrlr_op_rpc_complete, ctx);
2629 4 : if (rc == 0) {
2630 1 : return;
2631 3 : } else if (rc == -EALREADY) {
2632 0 : rc = 0;
2633 : }
2634 :
2635 3 : nvme_ctrlr_op_rpc_complete(ctx, rc);
2636 : }
2637 :
2638 : static void nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc);
2639 :
2640 : static void
2641 2 : _nvme_bdev_ctrlr_op_rpc_continue(void *_ctx)
2642 : {
2643 2 : struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
2644 : struct nvme_ctrlr *prev_nvme_ctrlr, *next_nvme_ctrlr;
2645 : int rc;
2646 :
2647 2 : prev_nvme_ctrlr = ctx->nvme_ctrlr;
2648 2 : ctx->nvme_ctrlr = NULL;
2649 :
2650 2 : if (ctx->rc != 0) {
2651 0 : goto complete;
2652 : }
2653 :
2654 2 : next_nvme_ctrlr = TAILQ_NEXT(prev_nvme_ctrlr, tailq);
2655 2 : if (next_nvme_ctrlr == NULL) {
2656 1 : goto complete;
2657 : }
2658 :
2659 1 : rc = nvme_ctrlr_op(next_nvme_ctrlr, ctx->op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
2660 1 : if (rc == 0) {
2661 1 : ctx->nvme_ctrlr = next_nvme_ctrlr;
2662 1 : return;
2663 0 : } else if (rc == -EALREADY) {
2664 0 : ctx->nvme_ctrlr = next_nvme_ctrlr;
2665 0 : rc = 0;
2666 : }
2667 :
2668 0 : ctx->rc = rc;
2669 :
2670 1 : complete:
2671 1 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
2672 1 : free(ctx);
2673 : }
2674 :
2675 : static void
2676 2 : nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc)
2677 : {
2678 2 : struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
2679 :
2680 2 : ctx->rc = rc;
2681 :
2682 2 : spdk_thread_send_msg(ctx->orig_thread, _nvme_bdev_ctrlr_op_rpc_continue, ctx);
2683 2 : }
2684 :
2685 : void
2686 1 : nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op,
2687 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2688 : {
2689 : struct nvme_ctrlr_op_rpc_ctx *ctx;
2690 : struct nvme_ctrlr *nvme_ctrlr;
2691 : int rc;
2692 :
2693 1 : assert(cb_fn != NULL);
2694 :
2695 1 : ctx = calloc(1, sizeof(*ctx));
2696 1 : if (ctx == NULL) {
2697 0 : SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
2698 0 : cb_fn(cb_arg, -ENOMEM);
2699 0 : return;
2700 : }
2701 :
2702 1 : ctx->orig_thread = spdk_get_thread();
2703 1 : ctx->op = op;
2704 1 : ctx->cb_fn = cb_fn;
2705 1 : ctx->cb_arg = cb_arg;
2706 :
2707 1 : nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
2708 1 : assert(nvme_ctrlr != NULL);
2709 :
2710 1 : rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
2711 1 : if (rc == 0) {
2712 1 : ctx->nvme_ctrlr = nvme_ctrlr;
2713 1 : return;
2714 0 : } else if (rc == -EALREADY) {
2715 0 : ctx->nvme_ctrlr = nvme_ctrlr;
2716 0 : rc = 0;
2717 : }
2718 :
2719 0 : nvme_bdev_ctrlr_op_rpc_continue(ctx, rc);
2720 : }
2721 :
2722 : static int _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio);
2723 :
2724 : static void
2725 4 : _bdev_nvme_reset_io_complete(struct spdk_io_channel_iter *i, int status)
2726 : {
2727 4 : struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i);
2728 : enum spdk_bdev_io_status io_status;
2729 :
2730 4 : if (bio->cpl.cdw0 == 0) {
2731 3 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
2732 : } else {
2733 1 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
2734 : }
2735 :
2736 4 : __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), io_status, NULL);
2737 4 : }
2738 :
2739 : static void
2740 8 : bdev_nvme_abort_bdev_channel(struct spdk_io_channel_iter *i)
2741 : {
2742 8 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
2743 8 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
2744 :
2745 8 : bdev_nvme_abort_retry_ios(nbdev_ch);
2746 :
2747 8 : spdk_for_each_channel_continue(i, 0);
2748 8 : }
2749 :
2750 : static void
2751 4 : bdev_nvme_reset_io_complete(struct nvme_bdev_io *bio)
2752 : {
2753 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
2754 4 : struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
2755 :
2756 : /* Abort all queued I/Os for retry. */
2757 4 : spdk_for_each_channel(nbdev,
2758 : bdev_nvme_abort_bdev_channel,
2759 : bio,
2760 : _bdev_nvme_reset_io_complete);
2761 4 : }
2762 :
2763 : static void
2764 6 : _bdev_nvme_reset_io_continue(void *ctx)
2765 : {
2766 6 : struct nvme_bdev_io *bio = ctx;
2767 : struct nvme_io_path *prev_io_path, *next_io_path;
2768 : int rc;
2769 :
2770 6 : prev_io_path = bio->io_path;
2771 6 : bio->io_path = NULL;
2772 :
2773 6 : if (bio->cpl.cdw0 != 0) {
2774 1 : goto complete;
2775 : }
2776 :
2777 5 : next_io_path = STAILQ_NEXT(prev_io_path, stailq);
2778 5 : if (next_io_path == NULL) {
2779 3 : goto complete;
2780 : }
2781 :
2782 2 : rc = _bdev_nvme_reset_io(next_io_path, bio);
2783 2 : if (rc == 0) {
2784 2 : return;
2785 : }
2786 :
2787 0 : bio->cpl.cdw0 = 1;
2788 :
2789 4 : complete:
2790 4 : bdev_nvme_reset_io_complete(bio);
2791 : }
2792 :
2793 : static void
2794 6 : bdev_nvme_reset_io_continue(void *cb_arg, int rc)
2795 : {
2796 6 : struct nvme_bdev_io *bio = cb_arg;
2797 6 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
2798 :
2799 6 : bio->cpl.cdw0 = (rc == 0) ? 0 : 1;
2800 :
2801 6 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), _bdev_nvme_reset_io_continue, bio);
2802 6 : }
2803 :
2804 : static int
2805 9 : _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
2806 : {
2807 : struct nvme_ctrlr_channel *ctrlr_ch;
2808 : struct spdk_bdev_io *bdev_io;
2809 : int rc;
2810 :
2811 9 : rc = nvme_ctrlr_op(io_path->qpair->ctrlr, NVME_CTRLR_OP_RESET,
2812 : bdev_nvme_reset_io_continue, bio);
2813 9 : if (rc == 0) {
2814 6 : assert(bio->io_path == NULL);
2815 6 : bio->io_path = io_path;
2816 3 : } else if (rc == -EBUSY) {
2817 3 : ctrlr_ch = io_path->qpair->ctrlr_ch;
2818 3 : assert(ctrlr_ch != NULL);
2819 : /*
2820 : * Reset call is queued only if it is from the app framework. This is on purpose so that
2821 : * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
2822 : * upper level. If they are in the middle of a reset, we won't try to schedule another one.
2823 : */
2824 3 : bdev_io = spdk_bdev_io_from_ctx(bio);
2825 3 : TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bdev_io, module_link);
2826 3 : rc = 0;
2827 : }
2828 :
2829 9 : return rc;
2830 : }
2831 :
2832 : static void
2833 7 : bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio)
2834 : {
2835 : struct nvme_io_path *io_path;
2836 : int rc;
2837 :
2838 7 : bio->cpl.cdw0 = 0;
2839 :
2840 : /* Reset all nvme_ctrlrs of a bdev controller sequentially. */
2841 7 : io_path = STAILQ_FIRST(&nbdev_ch->io_path_list);
2842 7 : assert(io_path != NULL);
2843 :
2844 7 : rc = _bdev_nvme_reset_io(io_path, bio);
2845 7 : if (rc != 0) {
2846 : /* If the current nvme_ctrlr is disabled, skip it and move to the next nvme_ctrlr. */
2847 0 : bdev_nvme_reset_io_continue(bio, rc == -EALREADY);
2848 : }
2849 7 : }
2850 :
2851 : static int
2852 18 : bdev_nvme_failover_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool remove)
2853 : {
2854 18 : if (nvme_ctrlr->destruct) {
2855 : /* Don't bother resetting if the controller is in the process of being destructed. */
2856 2 : return -ENXIO;
2857 : }
2858 :
2859 16 : if (nvme_ctrlr->resetting) {
2860 3 : if (!nvme_ctrlr->in_failover) {
2861 3 : SPDK_NOTICELOG("Reset is already in progress. Defer failover until reset completes.\n");
2862 :
2863 : /* Defer failover until reset completes. */
2864 3 : nvme_ctrlr->pending_failover = true;
2865 3 : return -EINPROGRESS;
2866 : } else {
2867 0 : SPDK_NOTICELOG("Unable to perform failover, already in progress.\n");
2868 0 : return -EBUSY;
2869 : }
2870 : }
2871 :
2872 13 : bdev_nvme_failover_trid(nvme_ctrlr, remove, true);
2873 :
2874 13 : if (nvme_ctrlr->reconnect_is_delayed) {
2875 1 : SPDK_NOTICELOG("Reconnect is already scheduled.\n");
2876 :
2877 : /* We rely on the next reconnect for the failover. */
2878 1 : return -EALREADY;
2879 : }
2880 :
2881 12 : if (nvme_ctrlr->disabled) {
2882 0 : SPDK_NOTICELOG("Controller is disabled.\n");
2883 :
2884 : /* We rely on the enablement for the failover. */
2885 0 : return -EALREADY;
2886 : }
2887 :
2888 12 : nvme_ctrlr->resetting = true;
2889 12 : nvme_ctrlr->in_failover = true;
2890 :
2891 12 : assert(nvme_ctrlr->reset_start_tsc == 0);
2892 12 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2893 :
2894 12 : return 0;
2895 : }
2896 :
2897 : static int
2898 16 : bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2899 : {
2900 : int rc;
2901 :
2902 16 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2903 16 : rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, false);
2904 16 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2905 :
2906 16 : if (rc == 0) {
2907 11 : spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset_ctrlr, nvme_ctrlr);
2908 5 : } else if (rc == -EALREADY) {
2909 0 : rc = 0;
2910 : }
2911 :
2912 16 : return rc;
2913 : }
2914 :
2915 : static int bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks,
2916 : uint64_t num_blocks);
2917 :
2918 : static int bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks,
2919 : uint64_t num_blocks);
2920 :
2921 : static int bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks,
2922 : uint64_t src_offset_blocks,
2923 : uint64_t num_blocks);
2924 :
2925 : static void
2926 1 : bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
2927 : bool success)
2928 : {
2929 1 : struct nvme_bdev_io *bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
2930 1 : struct spdk_bdev *bdev = bdev_io->bdev;
2931 : int ret;
2932 :
2933 1 : if (!success) {
2934 0 : ret = -EINVAL;
2935 0 : goto exit;
2936 : }
2937 :
2938 1 : if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
2939 0 : ret = -ENXIO;
2940 0 : goto exit;
2941 : }
2942 :
2943 1 : ret = bdev_nvme_readv(bio,
2944 : bdev_io->u.bdev.iovs,
2945 : bdev_io->u.bdev.iovcnt,
2946 : bdev_io->u.bdev.md_buf,
2947 : bdev_io->u.bdev.num_blocks,
2948 : bdev_io->u.bdev.offset_blocks,
2949 : bdev->dif_check_flags,
2950 : bdev_io->u.bdev.memory_domain,
2951 : bdev_io->u.bdev.memory_domain_ctx,
2952 : bdev_io->u.bdev.accel_sequence);
2953 :
2954 1 : exit:
2955 1 : if (spdk_unlikely(ret != 0)) {
2956 0 : bdev_nvme_io_complete(bio, ret);
2957 : }
2958 1 : }
2959 :
2960 : static inline void
2961 51 : _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
2962 : {
2963 51 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
2964 51 : struct spdk_bdev *bdev = bdev_io->bdev;
2965 : struct nvme_bdev_io *nbdev_io_to_abort;
2966 51 : int rc = 0;
2967 :
2968 51 : switch (bdev_io->type) {
2969 3 : case SPDK_BDEV_IO_TYPE_READ:
2970 3 : if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) {
2971 2 : rc = bdev_nvme_readv(nbdev_io,
2972 : bdev_io->u.bdev.iovs,
2973 : bdev_io->u.bdev.iovcnt,
2974 : bdev_io->u.bdev.md_buf,
2975 : bdev_io->u.bdev.num_blocks,
2976 : bdev_io->u.bdev.offset_blocks,
2977 : bdev->dif_check_flags,
2978 : bdev_io->u.bdev.memory_domain,
2979 : bdev_io->u.bdev.memory_domain_ctx,
2980 : bdev_io->u.bdev.accel_sequence);
2981 : } else {
2982 1 : spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
2983 1 : bdev_io->u.bdev.num_blocks * bdev->blocklen);
2984 1 : rc = 0;
2985 : }
2986 3 : break;
2987 25 : case SPDK_BDEV_IO_TYPE_WRITE:
2988 25 : rc = bdev_nvme_writev(nbdev_io,
2989 : bdev_io->u.bdev.iovs,
2990 : bdev_io->u.bdev.iovcnt,
2991 : bdev_io->u.bdev.md_buf,
2992 : bdev_io->u.bdev.num_blocks,
2993 : bdev_io->u.bdev.offset_blocks,
2994 : bdev->dif_check_flags,
2995 : bdev_io->u.bdev.memory_domain,
2996 : bdev_io->u.bdev.memory_domain_ctx,
2997 : bdev_io->u.bdev.accel_sequence);
2998 25 : break;
2999 1 : case SPDK_BDEV_IO_TYPE_COMPARE:
3000 1 : rc = bdev_nvme_comparev(nbdev_io,
3001 : bdev_io->u.bdev.iovs,
3002 : bdev_io->u.bdev.iovcnt,
3003 : bdev_io->u.bdev.md_buf,
3004 : bdev_io->u.bdev.num_blocks,
3005 : bdev_io->u.bdev.offset_blocks,
3006 : bdev->dif_check_flags);
3007 1 : break;
3008 2 : case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
3009 2 : rc = bdev_nvme_comparev_and_writev(nbdev_io,
3010 : bdev_io->u.bdev.iovs,
3011 : bdev_io->u.bdev.iovcnt,
3012 : bdev_io->u.bdev.fused_iovs,
3013 : bdev_io->u.bdev.fused_iovcnt,
3014 : bdev_io->u.bdev.md_buf,
3015 : bdev_io->u.bdev.num_blocks,
3016 : bdev_io->u.bdev.offset_blocks,
3017 : bdev->dif_check_flags);
3018 2 : break;
3019 1 : case SPDK_BDEV_IO_TYPE_UNMAP:
3020 1 : rc = bdev_nvme_unmap(nbdev_io,
3021 : bdev_io->u.bdev.offset_blocks,
3022 : bdev_io->u.bdev.num_blocks);
3023 1 : break;
3024 0 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3025 0 : rc = bdev_nvme_write_zeroes(nbdev_io,
3026 : bdev_io->u.bdev.offset_blocks,
3027 : bdev_io->u.bdev.num_blocks);
3028 0 : break;
3029 7 : case SPDK_BDEV_IO_TYPE_RESET:
3030 7 : nbdev_io->io_path = NULL;
3031 7 : bdev_nvme_reset_io(nbdev_ch, nbdev_io);
3032 7 : return;
3033 :
3034 1 : case SPDK_BDEV_IO_TYPE_FLUSH:
3035 1 : bdev_nvme_io_complete(nbdev_io, 0);
3036 1 : return;
3037 :
3038 0 : case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
3039 0 : rc = bdev_nvme_zone_appendv(nbdev_io,
3040 : bdev_io->u.bdev.iovs,
3041 : bdev_io->u.bdev.iovcnt,
3042 : bdev_io->u.bdev.md_buf,
3043 : bdev_io->u.bdev.num_blocks,
3044 : bdev_io->u.bdev.offset_blocks,
3045 : bdev->dif_check_flags);
3046 0 : break;
3047 0 : case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
3048 0 : rc = bdev_nvme_get_zone_info(nbdev_io,
3049 : bdev_io->u.zone_mgmt.zone_id,
3050 : bdev_io->u.zone_mgmt.num_zones,
3051 0 : bdev_io->u.zone_mgmt.buf);
3052 0 : break;
3053 0 : case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
3054 0 : rc = bdev_nvme_zone_management(nbdev_io,
3055 : bdev_io->u.zone_mgmt.zone_id,
3056 : bdev_io->u.zone_mgmt.zone_action);
3057 0 : break;
3058 5 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3059 5 : nbdev_io->io_path = NULL;
3060 5 : bdev_nvme_admin_passthru(nbdev_ch,
3061 : nbdev_io,
3062 : &bdev_io->u.nvme_passthru.cmd,
3063 : bdev_io->u.nvme_passthru.buf,
3064 : bdev_io->u.nvme_passthru.nbytes);
3065 5 : return;
3066 :
3067 0 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3068 0 : rc = bdev_nvme_io_passthru(nbdev_io,
3069 : &bdev_io->u.nvme_passthru.cmd,
3070 : bdev_io->u.nvme_passthru.buf,
3071 : bdev_io->u.nvme_passthru.nbytes);
3072 0 : break;
3073 0 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3074 0 : rc = bdev_nvme_io_passthru_md(nbdev_io,
3075 : &bdev_io->u.nvme_passthru.cmd,
3076 : bdev_io->u.nvme_passthru.buf,
3077 : bdev_io->u.nvme_passthru.nbytes,
3078 : bdev_io->u.nvme_passthru.md_buf,
3079 : bdev_io->u.nvme_passthru.md_len);
3080 0 : break;
3081 0 : case SPDK_BDEV_IO_TYPE_NVME_IOV_MD:
3082 0 : rc = bdev_nvme_iov_passthru_md(nbdev_io,
3083 : &bdev_io->u.nvme_passthru.cmd,
3084 : bdev_io->u.nvme_passthru.iovs,
3085 : bdev_io->u.nvme_passthru.iovcnt,
3086 : bdev_io->u.nvme_passthru.nbytes,
3087 : bdev_io->u.nvme_passthru.md_buf,
3088 : bdev_io->u.nvme_passthru.md_len);
3089 0 : break;
3090 6 : case SPDK_BDEV_IO_TYPE_ABORT:
3091 6 : nbdev_io->io_path = NULL;
3092 6 : nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
3093 6 : bdev_nvme_abort(nbdev_ch,
3094 : nbdev_io,
3095 : nbdev_io_to_abort);
3096 6 : return;
3097 :
3098 0 : case SPDK_BDEV_IO_TYPE_COPY:
3099 0 : rc = bdev_nvme_copy(nbdev_io,
3100 : bdev_io->u.bdev.offset_blocks,
3101 : bdev_io->u.bdev.copy.src_offset_blocks,
3102 : bdev_io->u.bdev.num_blocks);
3103 0 : break;
3104 0 : default:
3105 0 : rc = -EINVAL;
3106 0 : break;
3107 : }
3108 :
3109 32 : if (spdk_unlikely(rc != 0)) {
3110 0 : bdev_nvme_io_complete(nbdev_io, rc);
3111 : }
3112 : }
3113 :
3114 : static void
3115 58 : bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
3116 : {
3117 58 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
3118 58 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
3119 :
3120 58 : if (spdk_likely(nbdev_io->submit_tsc == 0)) {
3121 58 : nbdev_io->submit_tsc = spdk_bdev_io_get_submit_tsc(bdev_io);
3122 : } else {
3123 : /* There are cases where submit_tsc != 0, i.e. retry I/O.
3124 : * We need to update submit_tsc here.
3125 : */
3126 0 : nbdev_io->submit_tsc = spdk_get_ticks();
3127 : }
3128 :
3129 58 : spdk_trace_record(TRACE_BDEV_NVME_IO_START, 0, 0, (uintptr_t)nbdev_io, (uintptr_t)bdev_io);
3130 58 : nbdev_io->io_path = bdev_nvme_find_io_path(nbdev_ch);
3131 58 : if (spdk_unlikely(!nbdev_io->io_path)) {
3132 11 : if (!bdev_nvme_io_type_is_admin(bdev_io->type)) {
3133 10 : bdev_nvme_io_complete(nbdev_io, -ENXIO);
3134 10 : return;
3135 : }
3136 :
3137 : /* Admin commands do not use the optimal I/O path.
3138 : * Simply fall through even if it is not found.
3139 : */
3140 : }
3141 :
3142 48 : _bdev_nvme_submit_request(nbdev_ch, bdev_io);
3143 : }
3144 :
3145 : static bool
3146 0 : bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
3147 : {
3148 0 : struct nvme_bdev *nbdev = ctx;
3149 : struct nvme_ns *nvme_ns;
3150 : struct spdk_nvme_ns *ns;
3151 : struct spdk_nvme_ctrlr *ctrlr;
3152 : const struct spdk_nvme_ctrlr_data *cdata;
3153 :
3154 0 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
3155 0 : assert(nvme_ns != NULL);
3156 0 : ns = nvme_ns->ns;
3157 0 : if (ns == NULL) {
3158 0 : return false;
3159 : }
3160 :
3161 0 : ctrlr = spdk_nvme_ns_get_ctrlr(ns);
3162 :
3163 0 : switch (io_type) {
3164 0 : case SPDK_BDEV_IO_TYPE_READ:
3165 : case SPDK_BDEV_IO_TYPE_WRITE:
3166 : case SPDK_BDEV_IO_TYPE_RESET:
3167 : case SPDK_BDEV_IO_TYPE_FLUSH:
3168 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3169 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3170 : case SPDK_BDEV_IO_TYPE_ABORT:
3171 0 : return true;
3172 :
3173 0 : case SPDK_BDEV_IO_TYPE_COMPARE:
3174 0 : return spdk_nvme_ns_supports_compare(ns);
3175 :
3176 0 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3177 0 : return spdk_nvme_ns_get_md_size(ns) ? true : false;
3178 :
3179 0 : case SPDK_BDEV_IO_TYPE_UNMAP:
3180 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3181 0 : return cdata->oncs.dsm;
3182 :
3183 0 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3184 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3185 0 : return cdata->oncs.write_zeroes;
3186 :
3187 0 : case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
3188 0 : if (spdk_nvme_ctrlr_get_flags(ctrlr) &
3189 : SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) {
3190 0 : return true;
3191 : }
3192 0 : return false;
3193 :
3194 0 : case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
3195 : case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
3196 0 : return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS;
3197 :
3198 0 : case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
3199 0 : return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS &&
3200 0 : spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
3201 :
3202 0 : case SPDK_BDEV_IO_TYPE_COPY:
3203 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3204 0 : return cdata->oncs.copy;
3205 :
3206 0 : default:
3207 0 : return false;
3208 : }
3209 : }
3210 :
3211 : static int
3212 57 : nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrlr_ch)
3213 : {
3214 : struct nvme_qpair *nvme_qpair;
3215 : struct spdk_io_channel *pg_ch;
3216 : int rc;
3217 :
3218 57 : nvme_qpair = calloc(1, sizeof(*nvme_qpair));
3219 57 : if (!nvme_qpair) {
3220 0 : SPDK_ERRLOG("Failed to alloc nvme_qpair.\n");
3221 0 : return -1;
3222 : }
3223 :
3224 57 : TAILQ_INIT(&nvme_qpair->io_path_list);
3225 :
3226 57 : nvme_qpair->ctrlr = nvme_ctrlr;
3227 57 : nvme_qpair->ctrlr_ch = ctrlr_ch;
3228 :
3229 57 : pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);
3230 57 : if (!pg_ch) {
3231 0 : free(nvme_qpair);
3232 0 : return -1;
3233 : }
3234 :
3235 57 : nvme_qpair->group = spdk_io_channel_get_ctx(pg_ch);
3236 :
3237 : #ifdef SPDK_CONFIG_VTUNE
3238 : nvme_qpair->group->collect_spin_stat = true;
3239 : #else
3240 57 : nvme_qpair->group->collect_spin_stat = false;
3241 : #endif
3242 :
3243 57 : if (!nvme_ctrlr->disabled) {
3244 : /* If a nvme_ctrlr is disabled, don't try to create qpair for it. Qpair will
3245 : * be created when it's enabled.
3246 : */
3247 57 : rc = bdev_nvme_create_qpair(nvme_qpair);
3248 57 : if (rc != 0) {
3249 : /* nvme_ctrlr can't create IO qpair if connection is down.
3250 : * If reconnect_delay_sec is non-zero, creating IO qpair is retried
3251 : * after reconnect_delay_sec seconds. If bdev_retry_count is non-zero,
3252 : * submitted IO will be queued until IO qpair is successfully created.
3253 : *
3254 : * Hence, if both are satisfied, ignore the failure.
3255 : */
3256 0 : if (nvme_ctrlr->opts.reconnect_delay_sec == 0 || g_opts.bdev_retry_count == 0) {
3257 0 : spdk_put_io_channel(pg_ch);
3258 0 : free(nvme_qpair);
3259 0 : return rc;
3260 : }
3261 : }
3262 : }
3263 :
3264 57 : TAILQ_INSERT_TAIL(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
3265 :
3266 57 : ctrlr_ch->qpair = nvme_qpair;
3267 :
3268 57 : pthread_mutex_lock(&nvme_qpair->ctrlr->mutex);
3269 57 : nvme_qpair->ctrlr->ref++;
3270 57 : pthread_mutex_unlock(&nvme_qpair->ctrlr->mutex);
3271 :
3272 57 : return 0;
3273 : }
3274 :
3275 : static int
3276 57 : bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
3277 : {
3278 57 : struct nvme_ctrlr *nvme_ctrlr = io_device;
3279 57 : struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
3280 :
3281 57 : TAILQ_INIT(&ctrlr_ch->pending_resets);
3282 :
3283 57 : return nvme_qpair_create(nvme_ctrlr, ctrlr_ch);
3284 : }
3285 :
3286 : static void
3287 57 : nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
3288 : {
3289 : struct nvme_io_path *io_path, *next;
3290 :
3291 57 : assert(nvme_qpair->group != NULL);
3292 :
3293 92 : TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
3294 35 : TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
3295 35 : nvme_io_path_free(io_path);
3296 : }
3297 :
3298 57 : TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
3299 :
3300 57 : spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
3301 :
3302 57 : nvme_ctrlr_release(nvme_qpair->ctrlr);
3303 :
3304 57 : free(nvme_qpair);
3305 57 : }
3306 :
3307 : static void
3308 57 : bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
3309 : {
3310 57 : struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
3311 : struct nvme_qpair *nvme_qpair;
3312 :
3313 57 : nvme_qpair = ctrlr_ch->qpair;
3314 57 : assert(nvme_qpair != NULL);
3315 :
3316 57 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
3317 :
3318 57 : if (nvme_qpair->qpair != NULL) {
3319 43 : if (ctrlr_ch->reset_iter == NULL) {
3320 43 : spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
3321 : } else {
3322 : /* Skip current ctrlr_channel in a full reset sequence because
3323 : * it is being deleted now. The qpair is already being disconnected.
3324 : * We do not have to restart disconnecting it.
3325 : */
3326 0 : spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
3327 : }
3328 :
3329 : /* We cannot release a reference to the poll group now.
3330 : * The qpair may be disconnected asynchronously later.
3331 : * We need to poll it until it is actually disconnected.
3332 : * Just detach the qpair from the deleting ctrlr_channel.
3333 : */
3334 43 : nvme_qpair->ctrlr_ch = NULL;
3335 : } else {
3336 14 : assert(ctrlr_ch->reset_iter == NULL);
3337 :
3338 14 : nvme_qpair_delete(nvme_qpair);
3339 : }
3340 57 : }
3341 :
3342 : static inline struct spdk_io_channel *
3343 0 : bdev_nvme_get_accel_channel(struct nvme_poll_group *group)
3344 : {
3345 0 : if (spdk_unlikely(!group->accel_channel)) {
3346 0 : group->accel_channel = spdk_accel_get_io_channel();
3347 0 : if (!group->accel_channel) {
3348 0 : SPDK_ERRLOG("Cannot get the accel_channel for bdev nvme polling group=%p\n",
3349 : group);
3350 0 : return NULL;
3351 : }
3352 : }
3353 :
3354 0 : return group->accel_channel;
3355 : }
3356 :
3357 : static void
3358 0 : bdev_nvme_submit_accel_crc32c(void *ctx, uint32_t *dst, struct iovec *iov,
3359 : uint32_t iov_cnt, uint32_t seed,
3360 : spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
3361 : {
3362 : struct spdk_io_channel *accel_ch;
3363 0 : struct nvme_poll_group *group = ctx;
3364 : int rc;
3365 :
3366 0 : assert(cb_fn != NULL);
3367 :
3368 0 : accel_ch = bdev_nvme_get_accel_channel(group);
3369 0 : if (spdk_unlikely(accel_ch == NULL)) {
3370 0 : cb_fn(cb_arg, -ENOMEM);
3371 0 : return;
3372 : }
3373 :
3374 0 : rc = spdk_accel_submit_crc32cv(accel_ch, dst, iov, iov_cnt, seed, cb_fn, cb_arg);
3375 0 : if (rc) {
3376 : /* For the two cases, spdk_accel_submit_crc32cv does not call the user's cb_fn */
3377 0 : if (rc == -ENOMEM || rc == -EINVAL) {
3378 0 : cb_fn(cb_arg, rc);
3379 : }
3380 0 : SPDK_ERRLOG("Cannot complete the accelerated crc32c operation with iov=%p\n", iov);
3381 : }
3382 : }
3383 :
3384 : static void
3385 0 : bdev_nvme_finish_sequence(void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
3386 : {
3387 0 : spdk_accel_sequence_finish(seq, cb_fn, cb_arg);
3388 0 : }
3389 :
3390 : static void
3391 0 : bdev_nvme_abort_sequence(void *seq)
3392 : {
3393 0 : spdk_accel_sequence_abort(seq);
3394 0 : }
3395 :
3396 : static void
3397 0 : bdev_nvme_reverse_sequence(void *seq)
3398 : {
3399 0 : spdk_accel_sequence_reverse(seq);
3400 0 : }
3401 :
3402 : static int
3403 0 : bdev_nvme_append_crc32c(void *ctx, void **seq, uint32_t *dst, struct iovec *iovs, uint32_t iovcnt,
3404 : struct spdk_memory_domain *domain, void *domain_ctx, uint32_t seed,
3405 : spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
3406 : {
3407 : struct spdk_io_channel *ch;
3408 0 : struct nvme_poll_group *group = ctx;
3409 :
3410 0 : ch = bdev_nvme_get_accel_channel(group);
3411 0 : if (spdk_unlikely(ch == NULL)) {
3412 0 : return -ENOMEM;
3413 : }
3414 :
3415 0 : return spdk_accel_append_crc32c((struct spdk_accel_sequence **)seq, ch, dst, iovs, iovcnt,
3416 : domain, domain_ctx, seed, cb_fn, cb_arg);
3417 : }
3418 :
3419 : static struct spdk_nvme_accel_fn_table g_bdev_nvme_accel_fn_table = {
3420 : .table_size = sizeof(struct spdk_nvme_accel_fn_table),
3421 : .submit_accel_crc32c = bdev_nvme_submit_accel_crc32c,
3422 : .append_crc32c = bdev_nvme_append_crc32c,
3423 : .finish_sequence = bdev_nvme_finish_sequence,
3424 : .reverse_sequence = bdev_nvme_reverse_sequence,
3425 : .abort_sequence = bdev_nvme_abort_sequence,
3426 : };
3427 :
3428 : static int
3429 42 : bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf)
3430 : {
3431 42 : struct nvme_poll_group *group = ctx_buf;
3432 :
3433 42 : TAILQ_INIT(&group->qpair_list);
3434 :
3435 42 : group->group = spdk_nvme_poll_group_create(group, &g_bdev_nvme_accel_fn_table);
3436 42 : if (group->group == NULL) {
3437 0 : return -1;
3438 : }
3439 :
3440 42 : group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us);
3441 :
3442 42 : if (group->poller == NULL) {
3443 0 : spdk_nvme_poll_group_destroy(group->group);
3444 0 : return -1;
3445 : }
3446 :
3447 42 : return 0;
3448 : }
3449 :
3450 : static void
3451 42 : bdev_nvme_destroy_poll_group_cb(void *io_device, void *ctx_buf)
3452 : {
3453 42 : struct nvme_poll_group *group = ctx_buf;
3454 :
3455 42 : assert(TAILQ_EMPTY(&group->qpair_list));
3456 :
3457 42 : if (group->accel_channel) {
3458 0 : spdk_put_io_channel(group->accel_channel);
3459 : }
3460 :
3461 42 : spdk_poller_unregister(&group->poller);
3462 42 : if (spdk_nvme_poll_group_destroy(group->group)) {
3463 0 : SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n");
3464 0 : assert(false);
3465 : }
3466 42 : }
3467 :
3468 : static struct spdk_io_channel *
3469 0 : bdev_nvme_get_io_channel(void *ctx)
3470 : {
3471 0 : struct nvme_bdev *nvme_bdev = ctx;
3472 :
3473 0 : return spdk_get_io_channel(nvme_bdev);
3474 : }
3475 :
3476 : static void *
3477 0 : bdev_nvme_get_module_ctx(void *ctx)
3478 : {
3479 0 : struct nvme_bdev *nvme_bdev = ctx;
3480 : struct nvme_ns *nvme_ns;
3481 :
3482 0 : if (!nvme_bdev || nvme_bdev->disk.module != &nvme_if) {
3483 0 : return NULL;
3484 : }
3485 :
3486 0 : nvme_ns = TAILQ_FIRST(&nvme_bdev->nvme_ns_list);
3487 0 : if (!nvme_ns) {
3488 0 : return NULL;
3489 : }
3490 :
3491 0 : return nvme_ns->ns;
3492 : }
3493 :
3494 : static const char *
3495 0 : _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state)
3496 : {
3497 0 : switch (ana_state) {
3498 0 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
3499 0 : return "optimized";
3500 0 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
3501 0 : return "non_optimized";
3502 0 : case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3503 0 : return "inaccessible";
3504 0 : case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3505 0 : return "persistent_loss";
3506 0 : case SPDK_NVME_ANA_CHANGE_STATE:
3507 0 : return "change";
3508 0 : default:
3509 0 : return NULL;
3510 : }
3511 : }
3512 :
3513 : static int
3514 8 : bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
3515 : {
3516 8 : struct spdk_memory_domain **_domains = NULL;
3517 8 : struct nvme_bdev *nbdev = ctx;
3518 : struct nvme_ns *nvme_ns;
3519 8 : int i = 0, _array_size = array_size;
3520 8 : int rc = 0;
3521 :
3522 22 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
3523 14 : if (domains && array_size >= i) {
3524 11 : _domains = &domains[i];
3525 : } else {
3526 3 : _domains = NULL;
3527 : }
3528 14 : rc = spdk_nvme_ctrlr_get_memory_domains(nvme_ns->ctrlr->ctrlr, _domains, _array_size);
3529 14 : if (rc > 0) {
3530 13 : i += rc;
3531 13 : if (_array_size >= rc) {
3532 9 : _array_size -= rc;
3533 : } else {
3534 4 : _array_size = 0;
3535 : }
3536 1 : } else if (rc < 0) {
3537 0 : return rc;
3538 : }
3539 : }
3540 :
3541 8 : return i;
3542 : }
3543 :
3544 : static const char *
3545 0 : nvme_ctrlr_get_state_str(struct nvme_ctrlr *nvme_ctrlr)
3546 : {
3547 0 : if (nvme_ctrlr->destruct) {
3548 0 : return "deleting";
3549 0 : } else if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
3550 0 : return "failed";
3551 0 : } else if (nvme_ctrlr->resetting) {
3552 0 : return "resetting";
3553 0 : } else if (nvme_ctrlr->reconnect_is_delayed > 0) {
3554 0 : return "reconnect_is_delayed";
3555 0 : } else if (nvme_ctrlr->disabled) {
3556 0 : return "disabled";
3557 : } else {
3558 0 : return "enabled";
3559 : }
3560 : }
3561 :
3562 : void
3563 0 : nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr)
3564 0 : {
3565 : struct spdk_nvme_transport_id *trid;
3566 : const struct spdk_nvme_ctrlr_opts *opts;
3567 : const struct spdk_nvme_ctrlr_data *cdata;
3568 : struct nvme_path_id *path_id;
3569 :
3570 0 : spdk_json_write_object_begin(w);
3571 :
3572 0 : spdk_json_write_named_string(w, "state", nvme_ctrlr_get_state_str(nvme_ctrlr));
3573 :
3574 : #ifdef SPDK_CONFIG_NVME_CUSE
3575 0 : size_t cuse_name_size = 128;
3576 0 : char cuse_name[cuse_name_size];
3577 :
3578 0 : int rc = spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr, cuse_name, &cuse_name_size);
3579 0 : if (rc == 0) {
3580 0 : spdk_json_write_named_string(w, "cuse_device", cuse_name);
3581 : }
3582 : #endif
3583 0 : trid = &nvme_ctrlr->active_path_id->trid;
3584 0 : spdk_json_write_named_object_begin(w, "trid");
3585 0 : nvme_bdev_dump_trid_json(trid, w);
3586 0 : spdk_json_write_object_end(w);
3587 :
3588 0 : path_id = TAILQ_NEXT(nvme_ctrlr->active_path_id, link);
3589 0 : if (path_id != NULL) {
3590 0 : spdk_json_write_named_array_begin(w, "alternate_trids");
3591 : do {
3592 0 : trid = &path_id->trid;
3593 0 : spdk_json_write_object_begin(w);
3594 0 : nvme_bdev_dump_trid_json(trid, w);
3595 0 : spdk_json_write_object_end(w);
3596 :
3597 0 : path_id = TAILQ_NEXT(path_id, link);
3598 0 : } while (path_id != NULL);
3599 0 : spdk_json_write_array_end(w);
3600 : }
3601 :
3602 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
3603 0 : spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
3604 :
3605 0 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
3606 0 : spdk_json_write_named_object_begin(w, "host");
3607 0 : spdk_json_write_named_string(w, "nqn", opts->hostnqn);
3608 0 : spdk_json_write_named_string(w, "addr", opts->src_addr);
3609 0 : spdk_json_write_named_string(w, "svcid", opts->src_svcid);
3610 0 : spdk_json_write_object_end(w);
3611 :
3612 0 : spdk_json_write_object_end(w);
3613 0 : }
3614 :
3615 : static void
3616 0 : nvme_namespace_info_json(struct spdk_json_write_ctx *w,
3617 : struct nvme_ns *nvme_ns)
3618 0 : {
3619 : struct spdk_nvme_ns *ns;
3620 : struct spdk_nvme_ctrlr *ctrlr;
3621 : const struct spdk_nvme_ctrlr_data *cdata;
3622 : const struct spdk_nvme_transport_id *trid;
3623 : union spdk_nvme_vs_register vs;
3624 : const struct spdk_nvme_ns_data *nsdata;
3625 0 : char buf[128];
3626 :
3627 0 : ns = nvme_ns->ns;
3628 0 : if (ns == NULL) {
3629 0 : return;
3630 : }
3631 :
3632 0 : ctrlr = spdk_nvme_ns_get_ctrlr(ns);
3633 :
3634 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3635 0 : trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
3636 0 : vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr);
3637 :
3638 0 : spdk_json_write_object_begin(w);
3639 :
3640 0 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
3641 0 : spdk_json_write_named_string(w, "pci_address", trid->traddr);
3642 : }
3643 :
3644 0 : spdk_json_write_named_object_begin(w, "trid");
3645 :
3646 0 : nvme_bdev_dump_trid_json(trid, w);
3647 :
3648 0 : spdk_json_write_object_end(w);
3649 :
3650 : #ifdef SPDK_CONFIG_NVME_CUSE
3651 0 : size_t cuse_name_size = 128;
3652 0 : char cuse_name[cuse_name_size];
3653 :
3654 0 : int rc = spdk_nvme_cuse_get_ns_name(ctrlr, spdk_nvme_ns_get_id(ns),
3655 : cuse_name, &cuse_name_size);
3656 0 : if (rc == 0) {
3657 0 : spdk_json_write_named_string(w, "cuse_device", cuse_name);
3658 : }
3659 : #endif
3660 :
3661 0 : spdk_json_write_named_object_begin(w, "ctrlr_data");
3662 :
3663 0 : spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
3664 :
3665 0 : spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
3666 :
3667 0 : snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
3668 0 : spdk_str_trim(buf);
3669 0 : spdk_json_write_named_string(w, "model_number", buf);
3670 :
3671 0 : snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
3672 0 : spdk_str_trim(buf);
3673 0 : spdk_json_write_named_string(w, "serial_number", buf);
3674 :
3675 0 : snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
3676 0 : spdk_str_trim(buf);
3677 0 : spdk_json_write_named_string(w, "firmware_revision", buf);
3678 :
3679 0 : if (cdata->subnqn[0] != '\0') {
3680 0 : spdk_json_write_named_string(w, "subnqn", cdata->subnqn);
3681 : }
3682 :
3683 0 : spdk_json_write_named_object_begin(w, "oacs");
3684 :
3685 0 : spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
3686 0 : spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
3687 0 : spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
3688 0 : spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
3689 :
3690 0 : spdk_json_write_object_end(w);
3691 :
3692 0 : spdk_json_write_named_bool(w, "multi_ctrlr", cdata->cmic.multi_ctrlr);
3693 0 : spdk_json_write_named_bool(w, "ana_reporting", cdata->cmic.ana_reporting);
3694 :
3695 0 : spdk_json_write_object_end(w);
3696 :
3697 0 : spdk_json_write_named_object_begin(w, "vs");
3698 :
3699 0 : spdk_json_write_name(w, "nvme_version");
3700 0 : if (vs.bits.ter) {
3701 0 : spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
3702 : } else {
3703 0 : spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
3704 : }
3705 :
3706 0 : spdk_json_write_object_end(w);
3707 :
3708 0 : nsdata = spdk_nvme_ns_get_data(ns);
3709 :
3710 0 : spdk_json_write_named_object_begin(w, "ns_data");
3711 :
3712 0 : spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
3713 :
3714 0 : if (cdata->cmic.ana_reporting) {
3715 0 : spdk_json_write_named_string(w, "ana_state",
3716 : _nvme_ana_state_str(nvme_ns->ana_state));
3717 : }
3718 :
3719 0 : spdk_json_write_named_bool(w, "can_share", nsdata->nmic.can_share);
3720 :
3721 0 : spdk_json_write_object_end(w);
3722 :
3723 0 : if (cdata->oacs.security) {
3724 0 : spdk_json_write_named_object_begin(w, "security");
3725 :
3726 0 : spdk_json_write_named_bool(w, "opal", nvme_ns->bdev->opal);
3727 :
3728 0 : spdk_json_write_object_end(w);
3729 : }
3730 :
3731 0 : spdk_json_write_object_end(w);
3732 : }
3733 :
3734 : static const char *
3735 0 : nvme_bdev_get_mp_policy_str(struct nvme_bdev *nbdev)
3736 : {
3737 0 : switch (nbdev->mp_policy) {
3738 0 : case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
3739 0 : return "active_passive";
3740 0 : case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
3741 0 : return "active_active";
3742 0 : default:
3743 0 : assert(false);
3744 : return "invalid";
3745 : }
3746 : }
3747 :
3748 : static int
3749 0 : bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
3750 : {
3751 0 : struct nvme_bdev *nvme_bdev = ctx;
3752 : struct nvme_ns *nvme_ns;
3753 :
3754 0 : pthread_mutex_lock(&nvme_bdev->mutex);
3755 0 : spdk_json_write_named_array_begin(w, "nvme");
3756 0 : TAILQ_FOREACH(nvme_ns, &nvme_bdev->nvme_ns_list, tailq) {
3757 0 : nvme_namespace_info_json(w, nvme_ns);
3758 : }
3759 0 : spdk_json_write_array_end(w);
3760 0 : spdk_json_write_named_string(w, "mp_policy", nvme_bdev_get_mp_policy_str(nvme_bdev));
3761 0 : pthread_mutex_unlock(&nvme_bdev->mutex);
3762 :
3763 0 : return 0;
3764 : }
3765 :
3766 : static void
3767 0 : bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
3768 : {
3769 : /* No config per bdev needed */
3770 0 : }
3771 :
3772 : static uint64_t
3773 0 : bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
3774 : {
3775 0 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
3776 : struct nvme_io_path *io_path;
3777 : struct nvme_poll_group *group;
3778 0 : uint64_t spin_time = 0;
3779 :
3780 0 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
3781 0 : group = io_path->qpair->group;
3782 :
3783 0 : if (!group || !group->collect_spin_stat) {
3784 0 : continue;
3785 : }
3786 :
3787 0 : if (group->end_ticks != 0) {
3788 0 : group->spin_ticks += (group->end_ticks - group->start_ticks);
3789 0 : group->end_ticks = 0;
3790 : }
3791 :
3792 0 : spin_time += group->spin_ticks;
3793 0 : group->start_ticks = 0;
3794 0 : group->spin_ticks = 0;
3795 : }
3796 :
3797 0 : return (spin_time * 1000000ULL) / spdk_get_ticks_hz();
3798 : }
3799 :
3800 : static void
3801 0 : bdev_nvme_reset_device_stat(void *ctx)
3802 : {
3803 0 : struct nvme_bdev *nbdev = ctx;
3804 :
3805 0 : if (nbdev->err_stat != NULL) {
3806 0 : memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat));
3807 : }
3808 0 : }
3809 :
3810 : /* JSON string should be lowercases and underscore delimited string. */
3811 : static void
3812 0 : bdev_nvme_format_nvme_status(char *dst, const char *src)
3813 : {
3814 0 : char tmp[256];
3815 :
3816 0 : spdk_strcpy_replace(dst, 256, src, " - ", "_");
3817 0 : spdk_strcpy_replace(tmp, 256, dst, "-", "_");
3818 0 : spdk_strcpy_replace(dst, 256, tmp, " ", "_");
3819 0 : spdk_strlwr(dst);
3820 0 : }
3821 :
3822 : static void
3823 0 : bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w)
3824 : {
3825 0 : struct nvme_bdev *nbdev = ctx;
3826 0 : struct spdk_nvme_status status = {};
3827 : uint16_t sct, sc;
3828 0 : char status_json[256];
3829 : const char *status_str;
3830 :
3831 0 : if (nbdev->err_stat == NULL) {
3832 0 : return;
3833 : }
3834 :
3835 0 : spdk_json_write_named_object_begin(w, "nvme_error");
3836 :
3837 0 : spdk_json_write_named_object_begin(w, "status_type");
3838 0 : for (sct = 0; sct < 8; sct++) {
3839 0 : if (nbdev->err_stat->status_type[sct] == 0) {
3840 0 : continue;
3841 : }
3842 0 : status.sct = sct;
3843 :
3844 0 : status_str = spdk_nvme_cpl_get_status_type_string(&status);
3845 0 : assert(status_str != NULL);
3846 0 : bdev_nvme_format_nvme_status(status_json, status_str);
3847 :
3848 0 : spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]);
3849 : }
3850 0 : spdk_json_write_object_end(w);
3851 :
3852 0 : spdk_json_write_named_object_begin(w, "status_code");
3853 0 : for (sct = 0; sct < 4; sct++) {
3854 0 : status.sct = sct;
3855 0 : for (sc = 0; sc < 256; sc++) {
3856 0 : if (nbdev->err_stat->status[sct][sc] == 0) {
3857 0 : continue;
3858 : }
3859 0 : status.sc = sc;
3860 :
3861 0 : status_str = spdk_nvme_cpl_get_status_string(&status);
3862 0 : assert(status_str != NULL);
3863 0 : bdev_nvme_format_nvme_status(status_json, status_str);
3864 :
3865 0 : spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]);
3866 : }
3867 : }
3868 0 : spdk_json_write_object_end(w);
3869 :
3870 0 : spdk_json_write_object_end(w);
3871 : }
3872 :
3873 : static bool
3874 0 : bdev_nvme_accel_sequence_supported(void *ctx, enum spdk_bdev_io_type type)
3875 : {
3876 0 : struct nvme_bdev *nbdev = ctx;
3877 : struct spdk_nvme_ctrlr *ctrlr;
3878 :
3879 0 : if (!g_opts.allow_accel_sequence) {
3880 0 : return false;
3881 : }
3882 :
3883 0 : switch (type) {
3884 0 : case SPDK_BDEV_IO_TYPE_WRITE:
3885 : case SPDK_BDEV_IO_TYPE_READ:
3886 0 : break;
3887 0 : default:
3888 0 : return false;
3889 : }
3890 :
3891 0 : ctrlr = bdev_nvme_get_ctrlr(&nbdev->disk);
3892 0 : assert(ctrlr != NULL);
3893 :
3894 0 : return spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
3895 : }
3896 :
3897 : static const struct spdk_bdev_fn_table nvmelib_fn_table = {
3898 : .destruct = bdev_nvme_destruct,
3899 : .submit_request = bdev_nvme_submit_request,
3900 : .io_type_supported = bdev_nvme_io_type_supported,
3901 : .get_io_channel = bdev_nvme_get_io_channel,
3902 : .dump_info_json = bdev_nvme_dump_info_json,
3903 : .write_config_json = bdev_nvme_write_config_json,
3904 : .get_spin_time = bdev_nvme_get_spin_time,
3905 : .get_module_ctx = bdev_nvme_get_module_ctx,
3906 : .get_memory_domains = bdev_nvme_get_memory_domains,
3907 : .accel_sequence_supported = bdev_nvme_accel_sequence_supported,
3908 : .reset_device_stat = bdev_nvme_reset_device_stat,
3909 : .dump_device_stat_json = bdev_nvme_dump_device_stat_json,
3910 : };
3911 :
3912 : typedef int (*bdev_nvme_parse_ana_log_page_cb)(
3913 : const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg);
3914 :
3915 : static int
3916 40 : bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
3917 : bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
3918 : {
3919 : struct spdk_nvme_ana_group_descriptor *copied_desc;
3920 : uint8_t *orig_desc;
3921 : uint32_t i, desc_size, copy_len;
3922 40 : int rc = 0;
3923 :
3924 40 : if (nvme_ctrlr->ana_log_page == NULL) {
3925 0 : return -EINVAL;
3926 : }
3927 :
3928 40 : copied_desc = nvme_ctrlr->copied_ana_desc;
3929 :
3930 40 : orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
3931 40 : copy_len = nvme_ctrlr->max_ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
3932 :
3933 69 : for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) {
3934 65 : memcpy(copied_desc, orig_desc, copy_len);
3935 :
3936 65 : rc = cb_fn(copied_desc, cb_arg);
3937 65 : if (rc != 0) {
3938 36 : break;
3939 : }
3940 :
3941 29 : desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
3942 29 : copied_desc->num_of_nsid * sizeof(uint32_t);
3943 29 : orig_desc += desc_size;
3944 29 : copy_len -= desc_size;
3945 : }
3946 :
3947 40 : return rc;
3948 : }
3949 :
3950 : static int
3951 5 : nvme_ns_ana_transition_timedout(void *ctx)
3952 : {
3953 5 : struct nvme_ns *nvme_ns = ctx;
3954 :
3955 5 : spdk_poller_unregister(&nvme_ns->anatt_timer);
3956 5 : nvme_ns->ana_transition_timedout = true;
3957 :
3958 5 : return SPDK_POLLER_BUSY;
3959 : }
3960 :
3961 : static void
3962 45 : _nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
3963 : const struct spdk_nvme_ana_group_descriptor *desc)
3964 : {
3965 : const struct spdk_nvme_ctrlr_data *cdata;
3966 :
3967 45 : nvme_ns->ana_group_id = desc->ana_group_id;
3968 45 : nvme_ns->ana_state = desc->ana_state;
3969 45 : nvme_ns->ana_state_updating = false;
3970 :
3971 45 : switch (nvme_ns->ana_state) {
3972 38 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
3973 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
3974 38 : nvme_ns->ana_transition_timedout = false;
3975 38 : spdk_poller_unregister(&nvme_ns->anatt_timer);
3976 38 : break;
3977 :
3978 6 : case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3979 : case SPDK_NVME_ANA_CHANGE_STATE:
3980 6 : if (nvme_ns->anatt_timer != NULL) {
3981 1 : break;
3982 : }
3983 :
3984 5 : cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
3985 5 : nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
3986 : nvme_ns,
3987 : cdata->anatt * SPDK_SEC_TO_USEC);
3988 5 : break;
3989 1 : default:
3990 1 : break;
3991 : }
3992 45 : }
3993 :
3994 : static int
3995 59 : nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg)
3996 : {
3997 59 : struct nvme_ns *nvme_ns = cb_arg;
3998 : uint32_t i;
3999 :
4000 59 : assert(nvme_ns->ns != NULL);
4001 :
4002 81 : for (i = 0; i < desc->num_of_nsid; i++) {
4003 58 : if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) {
4004 22 : continue;
4005 : }
4006 :
4007 36 : _nvme_ns_set_ana_state(nvme_ns, desc);
4008 36 : return 1;
4009 : }
4010 :
4011 23 : return 0;
4012 : }
4013 :
4014 : static struct spdk_uuid
4015 5 : nvme_generate_uuid(const char *sn, uint32_t nsid)
4016 : {
4017 5 : struct spdk_uuid new_uuid, namespace_uuid;
4018 5 : char merged_str[SPDK_NVME_CTRLR_SN_LEN + NSID_STR_LEN + 1] = {'\0'};
4019 : /* This namespace UUID was generated using uuid_generate() method. */
4020 5 : const char *namespace_str = {"edaed2de-24bc-4b07-b559-f47ecbe730fd"};
4021 : int size;
4022 :
4023 5 : assert(strlen(sn) <= SPDK_NVME_CTRLR_SN_LEN);
4024 :
4025 5 : spdk_uuid_set_null(&new_uuid);
4026 5 : spdk_uuid_set_null(&namespace_uuid);
4027 :
4028 5 : size = snprintf(merged_str, sizeof(merged_str), "%s%"PRIu32, sn, nsid);
4029 5 : assert(size > 0 && (unsigned long)size < sizeof(merged_str));
4030 :
4031 5 : spdk_uuid_parse(&namespace_uuid, namespace_str);
4032 :
4033 5 : spdk_uuid_generate_sha1(&new_uuid, &namespace_uuid, merged_str, size);
4034 :
4035 5 : return new_uuid;
4036 : }
4037 :
4038 : static int
4039 37 : nvme_disk_create(struct spdk_bdev *disk, const char *base_name,
4040 : struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns,
4041 : uint32_t prchk_flags, void *ctx)
4042 : {
4043 : const struct spdk_uuid *uuid;
4044 : const uint8_t *nguid;
4045 : const struct spdk_nvme_ctrlr_data *cdata;
4046 : const struct spdk_nvme_ns_data *nsdata;
4047 : const struct spdk_nvme_ctrlr_opts *opts;
4048 : enum spdk_nvme_csi csi;
4049 : uint32_t atomic_bs, phys_bs, bs;
4050 37 : char sn_tmp[SPDK_NVME_CTRLR_SN_LEN + 1] = {'\0'};
4051 :
4052 37 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
4053 37 : csi = spdk_nvme_ns_get_csi(ns);
4054 37 : opts = spdk_nvme_ctrlr_get_opts(ctrlr);
4055 :
4056 37 : switch (csi) {
4057 37 : case SPDK_NVME_CSI_NVM:
4058 37 : disk->product_name = "NVMe disk";
4059 37 : break;
4060 0 : case SPDK_NVME_CSI_ZNS:
4061 0 : disk->product_name = "NVMe ZNS disk";
4062 0 : disk->zoned = true;
4063 0 : disk->zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
4064 0 : disk->max_zone_append_size = spdk_nvme_zns_ctrlr_get_max_zone_append_size(ctrlr) /
4065 0 : spdk_nvme_ns_get_extended_sector_size(ns);
4066 0 : disk->max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(ns);
4067 0 : disk->max_active_zones = spdk_nvme_zns_ns_get_max_active_zones(ns);
4068 0 : break;
4069 0 : default:
4070 0 : SPDK_ERRLOG("unsupported CSI: %u\n", csi);
4071 0 : return -ENOTSUP;
4072 : }
4073 :
4074 37 : disk->name = spdk_sprintf_alloc("%sn%d", base_name, spdk_nvme_ns_get_id(ns));
4075 37 : if (!disk->name) {
4076 0 : return -ENOMEM;
4077 : }
4078 :
4079 37 : disk->write_cache = 0;
4080 37 : if (cdata->vwc.present) {
4081 : /* Enable if the Volatile Write Cache exists */
4082 0 : disk->write_cache = 1;
4083 : }
4084 37 : if (cdata->oncs.write_zeroes) {
4085 0 : disk->max_write_zeroes = UINT16_MAX + 1;
4086 : }
4087 37 : disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
4088 37 : disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns);
4089 37 : disk->max_segment_size = spdk_nvme_ctrlr_get_max_xfer_size(ctrlr);
4090 : /* NVMe driver will split one request into multiple requests
4091 : * based on MDTS and stripe boundary, the bdev layer will use
4092 : * max_segment_size and max_num_segments to split one big IO
4093 : * into multiple requests, then small request can't run out
4094 : * of NVMe internal requests data structure.
4095 : */
4096 37 : if (opts && opts->io_queue_requests) {
4097 0 : disk->max_num_segments = opts->io_queue_requests / 2;
4098 : }
4099 37 : disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
4100 :
4101 37 : nguid = spdk_nvme_ns_get_nguid(ns);
4102 37 : if (!nguid) {
4103 37 : uuid = spdk_nvme_ns_get_uuid(ns);
4104 37 : if (uuid) {
4105 12 : disk->uuid = *uuid;
4106 25 : } else if (g_opts.generate_uuids) {
4107 0 : spdk_strcpy_pad(sn_tmp, cdata->sn, SPDK_NVME_CTRLR_SN_LEN, '\0');
4108 0 : disk->uuid = nvme_generate_uuid(sn_tmp, spdk_nvme_ns_get_id(ns));
4109 : }
4110 : } else {
4111 0 : memcpy(&disk->uuid, nguid, sizeof(disk->uuid));
4112 : }
4113 :
4114 37 : nsdata = spdk_nvme_ns_get_data(ns);
4115 37 : bs = spdk_nvme_ns_get_sector_size(ns);
4116 37 : atomic_bs = bs;
4117 37 : phys_bs = bs;
4118 37 : if (nsdata->nabo == 0) {
4119 37 : if (nsdata->nsfeat.ns_atomic_write_unit && nsdata->nawupf) {
4120 0 : atomic_bs = bs * (1 + nsdata->nawupf);
4121 : } else {
4122 37 : atomic_bs = bs * (1 + cdata->awupf);
4123 : }
4124 : }
4125 37 : if (nsdata->nsfeat.optperf) {
4126 0 : phys_bs = bs * (1 + nsdata->npwg);
4127 : }
4128 37 : disk->phys_blocklen = spdk_min(phys_bs, atomic_bs);
4129 :
4130 37 : disk->md_len = spdk_nvme_ns_get_md_size(ns);
4131 37 : if (disk->md_len != 0) {
4132 0 : disk->md_interleave = nsdata->flbas.extended;
4133 0 : disk->dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
4134 0 : if (disk->dif_type != SPDK_DIF_DISABLE) {
4135 0 : disk->dif_is_head_of_md = nsdata->dps.md_start;
4136 0 : disk->dif_check_flags = prchk_flags;
4137 : }
4138 : }
4139 :
4140 37 : if (!(spdk_nvme_ctrlr_get_flags(ctrlr) &
4141 : SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED)) {
4142 37 : disk->acwu = 0;
4143 0 : } else if (nsdata->nsfeat.ns_atomic_write_unit) {
4144 0 : disk->acwu = nsdata->nacwu + 1; /* 0-based */
4145 : } else {
4146 0 : disk->acwu = cdata->acwu + 1; /* 0-based */
4147 : }
4148 :
4149 37 : if (cdata->oncs.copy) {
4150 : /* For now bdev interface allows only single segment copy */
4151 0 : disk->max_copy = nsdata->mssrl;
4152 : }
4153 :
4154 37 : disk->ctxt = ctx;
4155 37 : disk->fn_table = &nvmelib_fn_table;
4156 37 : disk->module = &nvme_if;
4157 :
4158 37 : return 0;
4159 : }
4160 :
4161 : static struct nvme_bdev *
4162 37 : nvme_bdev_alloc(void)
4163 : {
4164 : struct nvme_bdev *bdev;
4165 : int rc;
4166 :
4167 37 : bdev = calloc(1, sizeof(*bdev));
4168 37 : if (!bdev) {
4169 0 : SPDK_ERRLOG("bdev calloc() failed\n");
4170 0 : return NULL;
4171 : }
4172 :
4173 37 : if (g_opts.nvme_error_stat) {
4174 0 : bdev->err_stat = calloc(1, sizeof(struct nvme_error_stat));
4175 0 : if (!bdev->err_stat) {
4176 0 : SPDK_ERRLOG("err_stat calloc() failed\n");
4177 0 : free(bdev);
4178 0 : return NULL;
4179 : }
4180 : }
4181 :
4182 37 : rc = pthread_mutex_init(&bdev->mutex, NULL);
4183 37 : if (rc != 0) {
4184 0 : free(bdev->err_stat);
4185 0 : free(bdev);
4186 0 : return NULL;
4187 : }
4188 :
4189 37 : bdev->ref = 1;
4190 37 : bdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE;
4191 37 : bdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN;
4192 37 : bdev->rr_min_io = UINT32_MAX;
4193 37 : TAILQ_INIT(&bdev->nvme_ns_list);
4194 :
4195 37 : return bdev;
4196 : }
4197 :
4198 : static int
4199 37 : nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4200 : {
4201 : struct nvme_bdev *bdev;
4202 37 : struct nvme_bdev_ctrlr *nbdev_ctrlr = nvme_ctrlr->nbdev_ctrlr;
4203 : int rc;
4204 :
4205 37 : bdev = nvme_bdev_alloc();
4206 37 : if (bdev == NULL) {
4207 0 : SPDK_ERRLOG("Failed to allocate NVMe bdev\n");
4208 0 : return -ENOMEM;
4209 : }
4210 :
4211 37 : bdev->opal = nvme_ctrlr->opal_dev != NULL;
4212 :
4213 37 : rc = nvme_disk_create(&bdev->disk, nbdev_ctrlr->name, nvme_ctrlr->ctrlr,
4214 : nvme_ns->ns, nvme_ctrlr->opts.prchk_flags, bdev);
4215 37 : if (rc != 0) {
4216 0 : SPDK_ERRLOG("Failed to create NVMe disk\n");
4217 0 : nvme_bdev_free(bdev);
4218 0 : return rc;
4219 : }
4220 :
4221 37 : spdk_io_device_register(bdev,
4222 : bdev_nvme_create_bdev_channel_cb,
4223 : bdev_nvme_destroy_bdev_channel_cb,
4224 : sizeof(struct nvme_bdev_channel),
4225 37 : bdev->disk.name);
4226 :
4227 37 : nvme_ns->bdev = bdev;
4228 37 : bdev->nsid = nvme_ns->id;
4229 37 : TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
4230 :
4231 37 : bdev->nbdev_ctrlr = nbdev_ctrlr;
4232 37 : TAILQ_INSERT_TAIL(&nbdev_ctrlr->bdevs, bdev, tailq);
4233 :
4234 37 : rc = spdk_bdev_register(&bdev->disk);
4235 37 : if (rc != 0) {
4236 1 : SPDK_ERRLOG("spdk_bdev_register() failed\n");
4237 1 : spdk_io_device_unregister(bdev, NULL);
4238 1 : nvme_ns->bdev = NULL;
4239 1 : TAILQ_REMOVE(&nbdev_ctrlr->bdevs, bdev, tailq);
4240 1 : nvme_bdev_free(bdev);
4241 1 : return rc;
4242 : }
4243 :
4244 36 : return 0;
4245 : }
4246 :
4247 : static bool
4248 23 : bdev_nvme_compare_ns(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
4249 : {
4250 : const struct spdk_nvme_ns_data *nsdata1, *nsdata2;
4251 : const struct spdk_uuid *uuid1, *uuid2;
4252 :
4253 23 : nsdata1 = spdk_nvme_ns_get_data(ns1);
4254 23 : nsdata2 = spdk_nvme_ns_get_data(ns2);
4255 23 : uuid1 = spdk_nvme_ns_get_uuid(ns1);
4256 23 : uuid2 = spdk_nvme_ns_get_uuid(ns2);
4257 :
4258 45 : return memcmp(nsdata1->nguid, nsdata2->nguid, sizeof(nsdata1->nguid)) == 0 &&
4259 22 : nsdata1->eui64 == nsdata2->eui64 &&
4260 21 : ((uuid1 == NULL && uuid2 == NULL) ||
4261 59 : (uuid1 != NULL && uuid2 != NULL && spdk_uuid_compare(uuid1, uuid2) == 0)) &&
4262 18 : spdk_nvme_ns_get_csi(ns1) == spdk_nvme_ns_get_csi(ns2);
4263 : }
4264 :
4265 : static bool
4266 0 : hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
4267 : struct spdk_nvme_ctrlr_opts *opts)
4268 : {
4269 : struct nvme_probe_skip_entry *entry;
4270 :
4271 0 : TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
4272 0 : if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
4273 0 : return false;
4274 : }
4275 : }
4276 :
4277 0 : opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
4278 0 : opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
4279 0 : opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
4280 0 : opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
4281 0 : opts->disable_read_ana_log_page = true;
4282 :
4283 0 : SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr);
4284 :
4285 0 : return true;
4286 : }
4287 :
4288 : static void
4289 0 : nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
4290 : {
4291 0 : struct nvme_ctrlr *nvme_ctrlr = ctx;
4292 :
4293 0 : if (spdk_nvme_cpl_is_error(cpl)) {
4294 0 : SPDK_WARNLOG("Abort failed. Resetting controller. sc is %u, sct is %u.\n", cpl->status.sc,
4295 : cpl->status.sct);
4296 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4297 0 : } else if (cpl->cdw0 & 0x1) {
4298 0 : SPDK_WARNLOG("Specified command could not be aborted.\n");
4299 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4300 : }
4301 0 : }
4302 :
4303 : static void
4304 0 : timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
4305 : struct spdk_nvme_qpair *qpair, uint16_t cid)
4306 : {
4307 0 : struct nvme_ctrlr *nvme_ctrlr = cb_arg;
4308 : union spdk_nvme_csts_register csts;
4309 : int rc;
4310 :
4311 0 : assert(nvme_ctrlr->ctrlr == ctrlr);
4312 :
4313 0 : SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
4314 :
4315 : /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O
4316 : * queue. (Note: qpair == NULL when there's an admin cmd timeout.) Otherwise we
4317 : * would submit another fabrics cmd on the admin queue to read CSTS and check for its
4318 : * completion recursively.
4319 : */
4320 0 : if (nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE || qpair != NULL) {
4321 0 : csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
4322 0 : if (csts.bits.cfs) {
4323 0 : SPDK_ERRLOG("Controller Fatal Status, reset required\n");
4324 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4325 0 : return;
4326 : }
4327 : }
4328 :
4329 0 : switch (g_opts.action_on_timeout) {
4330 0 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
4331 0 : if (qpair) {
4332 : /* Don't send abort to ctrlr when ctrlr is not available. */
4333 0 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4334 0 : if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
4335 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4336 0 : SPDK_NOTICELOG("Quit abort. Ctrlr is not available.\n");
4337 0 : return;
4338 : }
4339 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4340 :
4341 0 : rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
4342 : nvme_abort_cpl, nvme_ctrlr);
4343 0 : if (rc == 0) {
4344 0 : return;
4345 : }
4346 :
4347 0 : SPDK_ERRLOG("Unable to send abort. Resetting, rc is %d.\n", rc);
4348 : }
4349 :
4350 : /* FALLTHROUGH */
4351 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
4352 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4353 0 : break;
4354 0 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
4355 0 : SPDK_DEBUGLOG(bdev_nvme, "No action for nvme controller timeout.\n");
4356 0 : break;
4357 0 : default:
4358 0 : SPDK_ERRLOG("An invalid timeout action value is found.\n");
4359 0 : break;
4360 : }
4361 : }
4362 :
4363 : static struct nvme_ns *
4364 50 : nvme_ns_alloc(void)
4365 : {
4366 : struct nvme_ns *nvme_ns;
4367 :
4368 50 : nvme_ns = calloc(1, sizeof(struct nvme_ns));
4369 50 : if (nvme_ns == NULL) {
4370 0 : return NULL;
4371 : }
4372 :
4373 50 : if (g_opts.io_path_stat) {
4374 0 : nvme_ns->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
4375 0 : if (nvme_ns->stat == NULL) {
4376 0 : free(nvme_ns);
4377 0 : return NULL;
4378 : }
4379 0 : spdk_bdev_reset_io_stat(nvme_ns->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
4380 : }
4381 :
4382 50 : return nvme_ns;
4383 : }
4384 :
4385 : static void
4386 50 : nvme_ns_free(struct nvme_ns *nvme_ns)
4387 : {
4388 50 : free(nvme_ns->stat);
4389 50 : free(nvme_ns);
4390 50 : }
4391 :
4392 : static void
4393 50 : nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc)
4394 : {
4395 50 : struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
4396 50 : struct nvme_async_probe_ctx *ctx = nvme_ns->probe_ctx;
4397 :
4398 50 : if (rc == 0) {
4399 48 : nvme_ns->probe_ctx = NULL;
4400 48 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4401 48 : nvme_ctrlr->ref++;
4402 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4403 : } else {
4404 2 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4405 2 : nvme_ns_free(nvme_ns);
4406 : }
4407 :
4408 50 : if (ctx) {
4409 49 : ctx->populates_in_progress--;
4410 49 : if (ctx->populates_in_progress == 0) {
4411 12 : nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
4412 : }
4413 : }
4414 50 : }
4415 :
4416 : static void
4417 2 : bdev_nvme_add_io_path(struct spdk_io_channel_iter *i)
4418 : {
4419 2 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
4420 2 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
4421 2 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4422 : int rc;
4423 :
4424 2 : rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
4425 2 : if (rc != 0) {
4426 0 : SPDK_ERRLOG("Failed to add I/O path to bdev_channel dynamically.\n");
4427 : }
4428 :
4429 2 : spdk_for_each_channel_continue(i, rc);
4430 2 : }
4431 :
4432 : static void
4433 2 : bdev_nvme_delete_io_path(struct spdk_io_channel_iter *i)
4434 : {
4435 2 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
4436 2 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
4437 2 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4438 : struct nvme_io_path *io_path;
4439 :
4440 2 : io_path = _bdev_nvme_get_io_path(nbdev_ch, nvme_ns);
4441 2 : if (io_path != NULL) {
4442 2 : _bdev_nvme_delete_io_path(nbdev_ch, io_path);
4443 : }
4444 :
4445 2 : spdk_for_each_channel_continue(i, 0);
4446 2 : }
4447 :
4448 : static void
4449 0 : bdev_nvme_add_io_path_failed(struct spdk_io_channel_iter *i, int status)
4450 : {
4451 0 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4452 :
4453 0 : nvme_ctrlr_populate_namespace_done(nvme_ns, -1);
4454 0 : }
4455 :
4456 : static void
4457 12 : bdev_nvme_add_io_path_done(struct spdk_io_channel_iter *i, int status)
4458 : {
4459 12 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4460 12 : struct nvme_bdev *bdev = spdk_io_channel_iter_get_io_device(i);
4461 :
4462 12 : if (status == 0) {
4463 12 : nvme_ctrlr_populate_namespace_done(nvme_ns, 0);
4464 : } else {
4465 : /* Delete the added io_paths and fail populating the namespace. */
4466 0 : spdk_for_each_channel(bdev,
4467 : bdev_nvme_delete_io_path,
4468 : nvme_ns,
4469 : bdev_nvme_add_io_path_failed);
4470 : }
4471 12 : }
4472 :
4473 : static int
4474 13 : nvme_bdev_add_ns(struct nvme_bdev *bdev, struct nvme_ns *nvme_ns)
4475 : {
4476 : struct nvme_ns *tmp_ns;
4477 : const struct spdk_nvme_ns_data *nsdata;
4478 :
4479 13 : nsdata = spdk_nvme_ns_get_data(nvme_ns->ns);
4480 13 : if (!nsdata->nmic.can_share) {
4481 0 : SPDK_ERRLOG("Namespace cannot be shared.\n");
4482 0 : return -EINVAL;
4483 : }
4484 :
4485 13 : pthread_mutex_lock(&bdev->mutex);
4486 :
4487 13 : tmp_ns = TAILQ_FIRST(&bdev->nvme_ns_list);
4488 13 : assert(tmp_ns != NULL);
4489 :
4490 13 : if (tmp_ns->ns != NULL && !bdev_nvme_compare_ns(nvme_ns->ns, tmp_ns->ns)) {
4491 1 : pthread_mutex_unlock(&bdev->mutex);
4492 1 : SPDK_ERRLOG("Namespaces are not identical.\n");
4493 1 : return -EINVAL;
4494 : }
4495 :
4496 12 : bdev->ref++;
4497 12 : TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
4498 12 : nvme_ns->bdev = bdev;
4499 :
4500 12 : pthread_mutex_unlock(&bdev->mutex);
4501 :
4502 : /* Add nvme_io_path to nvme_bdev_channels dynamically. */
4503 12 : spdk_for_each_channel(bdev,
4504 : bdev_nvme_add_io_path,
4505 : nvme_ns,
4506 : bdev_nvme_add_io_path_done);
4507 :
4508 12 : return 0;
4509 : }
4510 :
4511 : static void
4512 50 : nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4513 : {
4514 : struct spdk_nvme_ns *ns;
4515 : struct nvme_bdev *bdev;
4516 50 : int rc = 0;
4517 :
4518 50 : ns = spdk_nvme_ctrlr_get_ns(nvme_ctrlr->ctrlr, nvme_ns->id);
4519 50 : if (!ns) {
4520 0 : SPDK_DEBUGLOG(bdev_nvme, "Invalid NS %d\n", nvme_ns->id);
4521 0 : rc = -EINVAL;
4522 0 : goto done;
4523 : }
4524 :
4525 50 : nvme_ns->ns = ns;
4526 50 : nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
4527 :
4528 50 : if (nvme_ctrlr->ana_log_page != NULL) {
4529 37 : bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns);
4530 : }
4531 :
4532 50 : bdev = nvme_bdev_ctrlr_get_bdev(nvme_ctrlr->nbdev_ctrlr, nvme_ns->id);
4533 50 : if (bdev == NULL) {
4534 37 : rc = nvme_bdev_create(nvme_ctrlr, nvme_ns);
4535 : } else {
4536 13 : rc = nvme_bdev_add_ns(bdev, nvme_ns);
4537 13 : if (rc == 0) {
4538 12 : return;
4539 : }
4540 : }
4541 1 : done:
4542 38 : nvme_ctrlr_populate_namespace_done(nvme_ns, rc);
4543 : }
4544 :
4545 : static void
4546 48 : nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns)
4547 : {
4548 48 : struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
4549 :
4550 48 : assert(nvme_ctrlr != NULL);
4551 :
4552 48 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4553 :
4554 48 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4555 :
4556 48 : if (nvme_ns->bdev != NULL) {
4557 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4558 0 : return;
4559 : }
4560 :
4561 48 : nvme_ns_free(nvme_ns);
4562 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4563 :
4564 48 : nvme_ctrlr_release(nvme_ctrlr);
4565 : }
4566 :
4567 : static void
4568 11 : bdev_nvme_delete_io_path_done(struct spdk_io_channel_iter *i, int status)
4569 : {
4570 11 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4571 :
4572 11 : nvme_ctrlr_depopulate_namespace_done(nvme_ns);
4573 11 : }
4574 :
4575 : static void
4576 48 : nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4577 : {
4578 : struct nvme_bdev *bdev;
4579 :
4580 48 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4581 :
4582 48 : bdev = nvme_ns->bdev;
4583 48 : if (bdev != NULL) {
4584 44 : pthread_mutex_lock(&bdev->mutex);
4585 :
4586 44 : assert(bdev->ref > 0);
4587 44 : bdev->ref--;
4588 44 : if (bdev->ref == 0) {
4589 33 : pthread_mutex_unlock(&bdev->mutex);
4590 :
4591 33 : spdk_bdev_unregister(&bdev->disk, NULL, NULL);
4592 : } else {
4593 : /* spdk_bdev_unregister() is not called until the last nvme_ns is
4594 : * depopulated. Hence we need to remove nvme_ns from bdev->nvme_ns_list
4595 : * and clear nvme_ns->bdev here.
4596 : */
4597 11 : TAILQ_REMOVE(&bdev->nvme_ns_list, nvme_ns, tailq);
4598 11 : nvme_ns->bdev = NULL;
4599 :
4600 11 : pthread_mutex_unlock(&bdev->mutex);
4601 :
4602 : /* Delete nvme_io_paths from nvme_bdev_channels dynamically. After that,
4603 : * we call depopulate_namespace_done() to avoid use-after-free.
4604 : */
4605 11 : spdk_for_each_channel(bdev,
4606 : bdev_nvme_delete_io_path,
4607 : nvme_ns,
4608 : bdev_nvme_delete_io_path_done);
4609 11 : return;
4610 : }
4611 : }
4612 :
4613 37 : nvme_ctrlr_depopulate_namespace_done(nvme_ns);
4614 : }
4615 :
4616 : static void
4617 61 : nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
4618 : struct nvme_async_probe_ctx *ctx)
4619 : {
4620 61 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
4621 : struct nvme_ns *nvme_ns, *next;
4622 : struct spdk_nvme_ns *ns;
4623 : struct nvme_bdev *bdev;
4624 : uint32_t nsid;
4625 : int rc;
4626 : uint64_t num_sectors;
4627 :
4628 61 : if (ctx) {
4629 : /* Initialize this count to 1 to handle the populate functions
4630 : * calling nvme_ctrlr_populate_namespace_done() immediately.
4631 : */
4632 45 : ctx->populates_in_progress = 1;
4633 : }
4634 :
4635 : /* First loop over our existing namespaces and see if they have been
4636 : * removed. */
4637 61 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
4638 65 : while (nvme_ns != NULL) {
4639 4 : next = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
4640 :
4641 4 : if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
4642 : /* NS is still there or added again. Its attributes may have changed. */
4643 3 : ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id);
4644 3 : if (nvme_ns->ns != ns) {
4645 1 : assert(nvme_ns->ns == NULL);
4646 1 : nvme_ns->ns = ns;
4647 1 : SPDK_DEBUGLOG(bdev_nvme, "NSID %u was added\n", nvme_ns->id);
4648 : }
4649 :
4650 3 : num_sectors = spdk_nvme_ns_get_num_sectors(ns);
4651 3 : bdev = nvme_ns->bdev;
4652 3 : assert(bdev != NULL);
4653 3 : if (bdev->disk.blockcnt != num_sectors) {
4654 1 : SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %" PRIu64 ", new size %" PRIu64 "\n",
4655 : nvme_ns->id,
4656 : bdev->disk.name,
4657 : bdev->disk.blockcnt,
4658 : num_sectors);
4659 1 : rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors);
4660 1 : if (rc != 0) {
4661 0 : SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n",
4662 : bdev->disk.name, rc);
4663 : }
4664 : }
4665 : } else {
4666 : /* Namespace was removed */
4667 1 : nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
4668 : }
4669 :
4670 4 : nvme_ns = next;
4671 : }
4672 :
4673 : /* Loop through all of the namespaces at the nvme level and see if any of them are new */
4674 61 : nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
4675 114 : while (nsid != 0) {
4676 53 : nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
4677 :
4678 53 : if (nvme_ns == NULL) {
4679 : /* Found a new one */
4680 50 : nvme_ns = nvme_ns_alloc();
4681 50 : if (nvme_ns == NULL) {
4682 0 : SPDK_ERRLOG("Failed to allocate namespace\n");
4683 : /* This just fails to attach the namespace. It may work on a future attempt. */
4684 0 : continue;
4685 : }
4686 :
4687 50 : nvme_ns->id = nsid;
4688 50 : nvme_ns->ctrlr = nvme_ctrlr;
4689 :
4690 50 : nvme_ns->bdev = NULL;
4691 :
4692 50 : if (ctx) {
4693 49 : ctx->populates_in_progress++;
4694 : }
4695 50 : nvme_ns->probe_ctx = ctx;
4696 :
4697 50 : RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4698 :
4699 50 : nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns);
4700 : }
4701 :
4702 53 : nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid);
4703 : }
4704 :
4705 61 : if (ctx) {
4706 : /* Decrement this count now that the loop is over to account
4707 : * for the one we started with. If the count is then 0, we
4708 : * know any populate_namespace functions completed immediately,
4709 : * so we'll kick the callback here.
4710 : */
4711 45 : ctx->populates_in_progress--;
4712 45 : if (ctx->populates_in_progress == 0) {
4713 33 : nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
4714 : }
4715 : }
4716 :
4717 61 : }
4718 :
4719 : static void
4720 59 : nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
4721 : {
4722 : struct nvme_ns *nvme_ns, *tmp;
4723 :
4724 106 : RB_FOREACH_SAFE(nvme_ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp) {
4725 47 : nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
4726 : }
4727 59 : }
4728 :
4729 : static uint32_t
4730 36 : nvme_ctrlr_get_ana_log_page_size(struct nvme_ctrlr *nvme_ctrlr)
4731 : {
4732 36 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
4733 : const struct spdk_nvme_ctrlr_data *cdata;
4734 36 : uint32_t nsid, ns_count = 0;
4735 :
4736 36 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
4737 :
4738 36 : for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
4739 80 : nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
4740 44 : ns_count++;
4741 : }
4742 :
4743 36 : return sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
4744 36 : sizeof(struct spdk_nvme_ana_group_descriptor) + ns_count *
4745 : sizeof(uint32_t);
4746 : }
4747 :
4748 : static int
4749 6 : nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
4750 : void *cb_arg)
4751 : {
4752 6 : struct nvme_ctrlr *nvme_ctrlr = cb_arg;
4753 : struct nvme_ns *nvme_ns;
4754 : uint32_t i, nsid;
4755 :
4756 11 : for (i = 0; i < desc->num_of_nsid; i++) {
4757 5 : nsid = desc->nsid[i];
4758 5 : if (nsid == 0) {
4759 0 : continue;
4760 : }
4761 :
4762 5 : nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
4763 :
4764 5 : assert(nvme_ns != NULL);
4765 5 : if (nvme_ns == NULL) {
4766 : /* Target told us that an inactive namespace had an ANA change */
4767 0 : continue;
4768 : }
4769 :
4770 5 : _nvme_ns_set_ana_state(nvme_ns, desc);
4771 : }
4772 :
4773 6 : return 0;
4774 : }
4775 :
4776 : static void
4777 0 : bdev_nvme_disable_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
4778 : {
4779 : struct nvme_ns *nvme_ns;
4780 :
4781 0 : spdk_free(nvme_ctrlr->ana_log_page);
4782 0 : nvme_ctrlr->ana_log_page = NULL;
4783 :
4784 0 : for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
4785 0 : nvme_ns != NULL;
4786 0 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
4787 0 : nvme_ns->ana_state_updating = false;
4788 0 : nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
4789 : }
4790 0 : }
4791 :
4792 : static void
4793 3 : nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
4794 : {
4795 3 : struct nvme_ctrlr *nvme_ctrlr = ctx;
4796 :
4797 3 : if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
4798 3 : bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
4799 : nvme_ctrlr);
4800 : } else {
4801 0 : bdev_nvme_disable_read_ana_log_page(nvme_ctrlr);
4802 : }
4803 :
4804 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4805 :
4806 3 : assert(nvme_ctrlr->ana_log_page_updating == true);
4807 3 : nvme_ctrlr->ana_log_page_updating = false;
4808 :
4809 3 : if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
4810 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4811 :
4812 0 : nvme_ctrlr_unregister(nvme_ctrlr);
4813 : } else {
4814 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4815 :
4816 3 : bdev_nvme_clear_io_path_caches(nvme_ctrlr);
4817 : }
4818 3 : }
4819 :
4820 : static int
4821 6 : nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
4822 : {
4823 : uint32_t ana_log_page_size;
4824 : int rc;
4825 :
4826 6 : if (nvme_ctrlr->ana_log_page == NULL) {
4827 0 : return -EINVAL;
4828 : }
4829 :
4830 6 : ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
4831 :
4832 6 : if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
4833 0 : SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
4834 : ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
4835 0 : return -EINVAL;
4836 : }
4837 :
4838 6 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4839 6 : if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
4840 : nvme_ctrlr->ana_log_page_updating) {
4841 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4842 3 : return -EBUSY;
4843 : }
4844 :
4845 3 : nvme_ctrlr->ana_log_page_updating = true;
4846 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4847 :
4848 3 : rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
4849 : SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
4850 : SPDK_NVME_GLOBAL_NS_TAG,
4851 3 : nvme_ctrlr->ana_log_page,
4852 : ana_log_page_size, 0,
4853 : nvme_ctrlr_read_ana_log_page_done,
4854 : nvme_ctrlr);
4855 3 : if (rc != 0) {
4856 0 : nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
4857 : }
4858 :
4859 3 : return rc;
4860 : }
4861 :
4862 : static void
4863 0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
4864 : {
4865 0 : }
4866 :
4867 : struct bdev_nvme_set_preferred_path_ctx {
4868 : struct spdk_bdev_desc *desc;
4869 : struct nvme_ns *nvme_ns;
4870 : bdev_nvme_set_preferred_path_cb cb_fn;
4871 : void *cb_arg;
4872 : };
4873 :
4874 : static void
4875 3 : bdev_nvme_set_preferred_path_done(struct spdk_io_channel_iter *i, int status)
4876 : {
4877 3 : struct bdev_nvme_set_preferred_path_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
4878 :
4879 3 : assert(ctx != NULL);
4880 3 : assert(ctx->desc != NULL);
4881 3 : assert(ctx->cb_fn != NULL);
4882 :
4883 3 : spdk_bdev_close(ctx->desc);
4884 :
4885 3 : ctx->cb_fn(ctx->cb_arg, status);
4886 :
4887 3 : free(ctx);
4888 3 : }
4889 :
4890 : static void
4891 2 : _bdev_nvme_set_preferred_path(struct spdk_io_channel_iter *i)
4892 : {
4893 2 : struct bdev_nvme_set_preferred_path_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
4894 2 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
4895 2 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
4896 : struct nvme_io_path *io_path, *prev;
4897 :
4898 2 : prev = NULL;
4899 3 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
4900 3 : if (io_path->nvme_ns == ctx->nvme_ns) {
4901 2 : break;
4902 : }
4903 1 : prev = io_path;
4904 : }
4905 :
4906 2 : if (io_path != NULL) {
4907 2 : if (prev != NULL) {
4908 1 : STAILQ_REMOVE_AFTER(&nbdev_ch->io_path_list, prev, stailq);
4909 1 : STAILQ_INSERT_HEAD(&nbdev_ch->io_path_list, io_path, stailq);
4910 : }
4911 :
4912 : /* We can set io_path to nbdev_ch->current_io_path directly here.
4913 : * However, it needs to be conditional. To simplify the code,
4914 : * just clear nbdev_ch->current_io_path and let find_io_path()
4915 : * fill it.
4916 : *
4917 : * Automatic failback may be disabled. Hence even if the io_path is
4918 : * already at the head, clear nbdev_ch->current_io_path.
4919 : */
4920 2 : bdev_nvme_clear_current_io_path(nbdev_ch);
4921 : }
4922 :
4923 2 : spdk_for_each_channel_continue(i, 0);
4924 2 : }
4925 :
4926 : static struct nvme_ns *
4927 3 : bdev_nvme_set_preferred_ns(struct nvme_bdev *nbdev, uint16_t cntlid)
4928 : {
4929 : struct nvme_ns *nvme_ns, *prev;
4930 : const struct spdk_nvme_ctrlr_data *cdata;
4931 :
4932 3 : prev = NULL;
4933 6 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
4934 6 : cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
4935 :
4936 6 : if (cdata->cntlid == cntlid) {
4937 3 : break;
4938 : }
4939 3 : prev = nvme_ns;
4940 : }
4941 :
4942 3 : if (nvme_ns != NULL && prev != NULL) {
4943 2 : TAILQ_REMOVE(&nbdev->nvme_ns_list, nvme_ns, tailq);
4944 2 : TAILQ_INSERT_HEAD(&nbdev->nvme_ns_list, nvme_ns, tailq);
4945 : }
4946 :
4947 3 : return nvme_ns;
4948 : }
4949 :
4950 : /* This function supports only multipath mode. There is only a single I/O path
4951 : * for each NVMe-oF controller. Hence, just move the matched I/O path to the
4952 : * head of the I/O path list for each NVMe bdev channel.
4953 : *
4954 : * NVMe bdev channel may be acquired after completing this function. move the
4955 : * matched namespace to the head of the namespace list for the NVMe bdev too.
4956 : */
4957 : void
4958 3 : bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid,
4959 : bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg)
4960 : {
4961 : struct bdev_nvme_set_preferred_path_ctx *ctx;
4962 : struct spdk_bdev *bdev;
4963 : struct nvme_bdev *nbdev;
4964 3 : int rc = 0;
4965 :
4966 3 : assert(cb_fn != NULL);
4967 :
4968 3 : ctx = calloc(1, sizeof(*ctx));
4969 3 : if (ctx == NULL) {
4970 0 : SPDK_ERRLOG("Failed to alloc context.\n");
4971 0 : rc = -ENOMEM;
4972 0 : goto err_alloc;
4973 : }
4974 :
4975 3 : ctx->cb_fn = cb_fn;
4976 3 : ctx->cb_arg = cb_arg;
4977 :
4978 3 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
4979 3 : if (rc != 0) {
4980 0 : SPDK_ERRLOG("Failed to open bdev %s.\n", name);
4981 0 : goto err_open;
4982 : }
4983 :
4984 3 : bdev = spdk_bdev_desc_get_bdev(ctx->desc);
4985 :
4986 3 : if (bdev->module != &nvme_if) {
4987 0 : SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
4988 0 : rc = -ENODEV;
4989 0 : goto err_bdev;
4990 : }
4991 :
4992 3 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
4993 :
4994 3 : pthread_mutex_lock(&nbdev->mutex);
4995 :
4996 3 : ctx->nvme_ns = bdev_nvme_set_preferred_ns(nbdev, cntlid);
4997 3 : if (ctx->nvme_ns == NULL) {
4998 0 : pthread_mutex_unlock(&nbdev->mutex);
4999 :
5000 0 : SPDK_ERRLOG("bdev %s does not have namespace to controller %u.\n", name, cntlid);
5001 0 : rc = -ENODEV;
5002 0 : goto err_bdev;
5003 : }
5004 :
5005 3 : pthread_mutex_unlock(&nbdev->mutex);
5006 :
5007 3 : spdk_for_each_channel(nbdev,
5008 : _bdev_nvme_set_preferred_path,
5009 : ctx,
5010 : bdev_nvme_set_preferred_path_done);
5011 3 : return;
5012 :
5013 0 : err_bdev:
5014 0 : spdk_bdev_close(ctx->desc);
5015 0 : err_open:
5016 0 : free(ctx);
5017 0 : err_alloc:
5018 0 : cb_fn(cb_arg, rc);
5019 : }
5020 :
5021 : struct bdev_nvme_set_multipath_policy_ctx {
5022 : struct spdk_bdev_desc *desc;
5023 : bdev_nvme_set_multipath_policy_cb cb_fn;
5024 : void *cb_arg;
5025 : };
5026 :
5027 : static void
5028 3 : bdev_nvme_set_multipath_policy_done(struct spdk_io_channel_iter *i, int status)
5029 : {
5030 3 : struct bdev_nvme_set_multipath_policy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
5031 :
5032 3 : assert(ctx != NULL);
5033 3 : assert(ctx->desc != NULL);
5034 3 : assert(ctx->cb_fn != NULL);
5035 :
5036 3 : spdk_bdev_close(ctx->desc);
5037 :
5038 3 : ctx->cb_fn(ctx->cb_arg, status);
5039 :
5040 3 : free(ctx);
5041 3 : }
5042 :
5043 : static void
5044 1 : _bdev_nvme_set_multipath_policy(struct spdk_io_channel_iter *i)
5045 : {
5046 1 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
5047 1 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
5048 1 : struct nvme_bdev *nbdev = spdk_io_channel_get_io_device(_ch);
5049 :
5050 1 : nbdev_ch->mp_policy = nbdev->mp_policy;
5051 1 : nbdev_ch->mp_selector = nbdev->mp_selector;
5052 1 : nbdev_ch->rr_min_io = nbdev->rr_min_io;
5053 1 : bdev_nvme_clear_current_io_path(nbdev_ch);
5054 :
5055 1 : spdk_for_each_channel_continue(i, 0);
5056 1 : }
5057 :
5058 : void
5059 3 : bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy policy,
5060 : enum bdev_nvme_multipath_selector selector, uint32_t rr_min_io,
5061 : bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
5062 : {
5063 : struct bdev_nvme_set_multipath_policy_ctx *ctx;
5064 : struct spdk_bdev *bdev;
5065 : struct nvme_bdev *nbdev;
5066 : int rc;
5067 :
5068 3 : assert(cb_fn != NULL);
5069 :
5070 3 : if (policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE && selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
5071 1 : if (rr_min_io == UINT32_MAX) {
5072 0 : rr_min_io = 1;
5073 1 : } else if (rr_min_io == 0) {
5074 0 : rc = -EINVAL;
5075 0 : goto exit;
5076 : }
5077 2 : } else if (rr_min_io != UINT32_MAX) {
5078 0 : rc = -EINVAL;
5079 0 : goto exit;
5080 : }
5081 :
5082 3 : ctx = calloc(1, sizeof(*ctx));
5083 3 : if (ctx == NULL) {
5084 0 : SPDK_ERRLOG("Failed to alloc context.\n");
5085 0 : rc = -ENOMEM;
5086 0 : goto exit;
5087 : }
5088 :
5089 3 : ctx->cb_fn = cb_fn;
5090 3 : ctx->cb_arg = cb_arg;
5091 :
5092 3 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
5093 3 : if (rc != 0) {
5094 0 : SPDK_ERRLOG("Failed to open bdev %s.\n", name);
5095 0 : rc = -ENODEV;
5096 0 : goto err_open;
5097 : }
5098 :
5099 3 : bdev = spdk_bdev_desc_get_bdev(ctx->desc);
5100 3 : if (bdev->module != &nvme_if) {
5101 0 : SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
5102 0 : rc = -ENODEV;
5103 0 : goto err_module;
5104 : }
5105 3 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
5106 :
5107 3 : pthread_mutex_lock(&nbdev->mutex);
5108 3 : nbdev->mp_policy = policy;
5109 3 : nbdev->mp_selector = selector;
5110 3 : nbdev->rr_min_io = rr_min_io;
5111 3 : pthread_mutex_unlock(&nbdev->mutex);
5112 :
5113 3 : spdk_for_each_channel(nbdev,
5114 : _bdev_nvme_set_multipath_policy,
5115 : ctx,
5116 : bdev_nvme_set_multipath_policy_done);
5117 3 : return;
5118 :
5119 0 : err_module:
5120 0 : spdk_bdev_close(ctx->desc);
5121 0 : err_open:
5122 0 : free(ctx);
5123 0 : exit:
5124 0 : cb_fn(cb_arg, rc);
5125 : }
5126 :
5127 : static void
5128 3 : aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
5129 : {
5130 3 : struct nvme_ctrlr *nvme_ctrlr = arg;
5131 : union spdk_nvme_async_event_completion event;
5132 :
5133 3 : if (spdk_nvme_cpl_is_error(cpl)) {
5134 0 : SPDK_WARNLOG("AER request execute failed\n");
5135 0 : return;
5136 : }
5137 :
5138 3 : event.raw = cpl->cdw0;
5139 3 : if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
5140 3 : (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
5141 2 : nvme_ctrlr_populate_namespaces(nvme_ctrlr, NULL);
5142 1 : } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
5143 1 : (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
5144 1 : nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
5145 : }
5146 : }
5147 :
5148 : static void
5149 51 : populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, int rc)
5150 : {
5151 51 : if (ctx->cb_fn) {
5152 51 : ctx->cb_fn(ctx->cb_ctx, ctx->reported_bdevs, rc);
5153 : }
5154 :
5155 51 : ctx->namespaces_populated = true;
5156 51 : if (ctx->probe_done) {
5157 : /* The probe was already completed, so we need to free the context
5158 : * here. This can happen for cases like OCSSD, where we need to
5159 : * send additional commands to the SSD after attach.
5160 : */
5161 31 : free(ctx);
5162 : }
5163 51 : }
5164 :
5165 : static void
5166 59 : nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr,
5167 : struct nvme_async_probe_ctx *ctx)
5168 : {
5169 59 : spdk_io_device_register(nvme_ctrlr,
5170 : bdev_nvme_create_ctrlr_channel_cb,
5171 : bdev_nvme_destroy_ctrlr_channel_cb,
5172 : sizeof(struct nvme_ctrlr_channel),
5173 59 : nvme_ctrlr->nbdev_ctrlr->name);
5174 :
5175 59 : nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx);
5176 59 : }
5177 :
5178 : static void
5179 30 : nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl)
5180 : {
5181 30 : struct nvme_ctrlr *nvme_ctrlr = _ctx;
5182 30 : struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx;
5183 :
5184 30 : nvme_ctrlr->probe_ctx = NULL;
5185 :
5186 30 : if (spdk_nvme_cpl_is_error(cpl)) {
5187 0 : nvme_ctrlr_delete(nvme_ctrlr);
5188 :
5189 0 : if (ctx != NULL) {
5190 0 : ctx->reported_bdevs = 0;
5191 0 : populate_namespaces_cb(ctx, -1);
5192 : }
5193 0 : return;
5194 : }
5195 :
5196 30 : nvme_ctrlr_create_done(nvme_ctrlr, ctx);
5197 : }
5198 :
5199 : static int
5200 30 : nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
5201 : struct nvme_async_probe_ctx *ctx)
5202 : {
5203 30 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5204 : const struct spdk_nvme_ctrlr_data *cdata;
5205 : uint32_t ana_log_page_size;
5206 :
5207 30 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5208 :
5209 : /* Set buffer size enough to include maximum number of allowed namespaces. */
5210 30 : ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
5211 30 : sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->mnan *
5212 : sizeof(uint32_t);
5213 :
5214 30 : nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL,
5215 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
5216 30 : if (nvme_ctrlr->ana_log_page == NULL) {
5217 0 : SPDK_ERRLOG("could not allocate ANA log page buffer\n");
5218 0 : return -ENXIO;
5219 : }
5220 :
5221 : /* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned.
5222 : * Hence copy each descriptor to a temporary area when parsing it.
5223 : *
5224 : * Allocate a buffer whose size is as large as ANA log page buffer because
5225 : * we do not know the size of a descriptor until actually reading it.
5226 : */
5227 30 : nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
5228 30 : if (nvme_ctrlr->copied_ana_desc == NULL) {
5229 0 : SPDK_ERRLOG("could not allocate a buffer to parse ANA descriptor\n");
5230 0 : return -ENOMEM;
5231 : }
5232 :
5233 30 : nvme_ctrlr->max_ana_log_page_size = ana_log_page_size;
5234 :
5235 30 : nvme_ctrlr->probe_ctx = ctx;
5236 :
5237 : /* Then, set the read size only to include the current active namespaces. */
5238 30 : ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
5239 :
5240 30 : if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
5241 0 : SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
5242 : ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
5243 0 : return -EINVAL;
5244 : }
5245 :
5246 30 : return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
5247 : SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
5248 : SPDK_NVME_GLOBAL_NS_TAG,
5249 30 : nvme_ctrlr->ana_log_page,
5250 : ana_log_page_size, 0,
5251 : nvme_ctrlr_init_ana_log_page_done,
5252 : nvme_ctrlr);
5253 : }
5254 :
5255 : /* hostnqn and subnqn were already verified before attaching a controller.
5256 : * Hence check only the multipath capability and cntlid here.
5257 : */
5258 : static bool
5259 16 : bdev_nvme_check_multipath(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct spdk_nvme_ctrlr *ctrlr)
5260 : {
5261 : struct nvme_ctrlr *tmp;
5262 : const struct spdk_nvme_ctrlr_data *cdata, *tmp_cdata;
5263 :
5264 16 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5265 :
5266 16 : if (!cdata->cmic.multi_ctrlr) {
5267 0 : SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
5268 0 : return false;
5269 : }
5270 :
5271 33 : TAILQ_FOREACH(tmp, &nbdev_ctrlr->ctrlrs, tailq) {
5272 18 : tmp_cdata = spdk_nvme_ctrlr_get_data(tmp->ctrlr);
5273 :
5274 18 : if (!tmp_cdata->cmic.multi_ctrlr) {
5275 0 : SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
5276 0 : return false;
5277 : }
5278 18 : if (cdata->cntlid == tmp_cdata->cntlid) {
5279 1 : SPDK_ERRLOG("cntlid %u are duplicated.\n", tmp_cdata->cntlid);
5280 1 : return false;
5281 : }
5282 : }
5283 :
5284 15 : return true;
5285 : }
5286 :
5287 : static int
5288 60 : nvme_bdev_ctrlr_create(const char *name, struct nvme_ctrlr *nvme_ctrlr)
5289 : {
5290 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
5291 60 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5292 60 : int rc = 0;
5293 :
5294 60 : pthread_mutex_lock(&g_bdev_nvme_mutex);
5295 :
5296 60 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
5297 60 : if (nbdev_ctrlr != NULL) {
5298 16 : if (!bdev_nvme_check_multipath(nbdev_ctrlr, ctrlr)) {
5299 1 : rc = -EINVAL;
5300 1 : goto exit;
5301 : }
5302 : } else {
5303 44 : nbdev_ctrlr = calloc(1, sizeof(*nbdev_ctrlr));
5304 44 : if (nbdev_ctrlr == NULL) {
5305 0 : SPDK_ERRLOG("Failed to allocate nvme_bdev_ctrlr.\n");
5306 0 : rc = -ENOMEM;
5307 0 : goto exit;
5308 : }
5309 44 : nbdev_ctrlr->name = strdup(name);
5310 44 : if (nbdev_ctrlr->name == NULL) {
5311 0 : SPDK_ERRLOG("Failed to allocate name of nvme_bdev_ctrlr.\n");
5312 0 : free(nbdev_ctrlr);
5313 0 : goto exit;
5314 : }
5315 44 : TAILQ_INIT(&nbdev_ctrlr->ctrlrs);
5316 44 : TAILQ_INIT(&nbdev_ctrlr->bdevs);
5317 44 : TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
5318 : }
5319 59 : nvme_ctrlr->nbdev_ctrlr = nbdev_ctrlr;
5320 59 : TAILQ_INSERT_TAIL(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
5321 60 : exit:
5322 60 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
5323 60 : return rc;
5324 : }
5325 :
5326 : static int
5327 60 : nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
5328 : const char *name,
5329 : const struct spdk_nvme_transport_id *trid,
5330 : struct nvme_async_probe_ctx *ctx)
5331 : {
5332 : struct nvme_ctrlr *nvme_ctrlr;
5333 : struct nvme_path_id *path_id;
5334 : const struct spdk_nvme_ctrlr_data *cdata;
5335 : int rc;
5336 :
5337 60 : nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
5338 60 : if (nvme_ctrlr == NULL) {
5339 0 : SPDK_ERRLOG("Failed to allocate device struct\n");
5340 0 : return -ENOMEM;
5341 : }
5342 :
5343 60 : rc = pthread_mutex_init(&nvme_ctrlr->mutex, NULL);
5344 60 : if (rc != 0) {
5345 0 : free(nvme_ctrlr);
5346 0 : return rc;
5347 : }
5348 :
5349 60 : TAILQ_INIT(&nvme_ctrlr->trids);
5350 :
5351 60 : RB_INIT(&nvme_ctrlr->namespaces);
5352 :
5353 60 : path_id = calloc(1, sizeof(*path_id));
5354 60 : if (path_id == NULL) {
5355 0 : SPDK_ERRLOG("Failed to allocate trid entry pointer\n");
5356 0 : rc = -ENOMEM;
5357 0 : goto err;
5358 : }
5359 :
5360 60 : path_id->trid = *trid;
5361 60 : if (ctx != NULL) {
5362 46 : memcpy(path_id->hostid.hostaddr, ctx->drv_opts.src_addr, sizeof(path_id->hostid.hostaddr));
5363 46 : memcpy(path_id->hostid.hostsvcid, ctx->drv_opts.src_svcid, sizeof(path_id->hostid.hostsvcid));
5364 : }
5365 60 : nvme_ctrlr->active_path_id = path_id;
5366 60 : TAILQ_INSERT_HEAD(&nvme_ctrlr->trids, path_id, link);
5367 :
5368 60 : nvme_ctrlr->thread = spdk_get_thread();
5369 60 : nvme_ctrlr->ctrlr = ctrlr;
5370 60 : nvme_ctrlr->ref = 1;
5371 :
5372 60 : if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
5373 0 : SPDK_ERRLOG("OCSSDs are not supported");
5374 0 : rc = -ENOTSUP;
5375 0 : goto err;
5376 : }
5377 :
5378 60 : if (ctx != NULL) {
5379 46 : memcpy(&nvme_ctrlr->opts, &ctx->bdev_opts, sizeof(ctx->bdev_opts));
5380 : } else {
5381 14 : bdev_nvme_get_default_ctrlr_opts(&nvme_ctrlr->opts);
5382 : }
5383 :
5384 60 : nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr,
5385 : g_opts.nvme_adminq_poll_period_us);
5386 :
5387 60 : if (g_opts.timeout_us > 0) {
5388 : /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */
5389 : /* If timeout_admin_us is 0 (not specified), admin uses same timeout as IO. */
5390 0 : uint64_t adm_timeout_us = (g_opts.timeout_admin_us == 0) ?
5391 0 : g_opts.timeout_us : g_opts.timeout_admin_us;
5392 0 : spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
5393 : adm_timeout_us, timeout_cb, nvme_ctrlr);
5394 : }
5395 :
5396 60 : spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
5397 60 : spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr);
5398 :
5399 60 : if (spdk_nvme_ctrlr_get_flags(ctrlr) &
5400 : SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
5401 0 : nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr);
5402 : }
5403 :
5404 60 : rc = nvme_bdev_ctrlr_create(name, nvme_ctrlr);
5405 60 : if (rc != 0) {
5406 1 : goto err;
5407 : }
5408 :
5409 59 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5410 :
5411 59 : if (cdata->cmic.ana_reporting) {
5412 30 : rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx);
5413 30 : if (rc == 0) {
5414 30 : return 0;
5415 : }
5416 : } else {
5417 29 : nvme_ctrlr_create_done(nvme_ctrlr, ctx);
5418 29 : return 0;
5419 : }
5420 :
5421 1 : err:
5422 1 : nvme_ctrlr_delete(nvme_ctrlr);
5423 1 : return rc;
5424 : }
5425 :
5426 : void
5427 56 : bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts)
5428 : {
5429 56 : opts->prchk_flags = 0;
5430 56 : opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
5431 56 : opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
5432 56 : opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
5433 56 : }
5434 :
5435 : static void
5436 0 : attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
5437 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
5438 : {
5439 : char *name;
5440 :
5441 0 : name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
5442 0 : if (!name) {
5443 0 : SPDK_ERRLOG("Failed to assign name to NVMe device\n");
5444 0 : return;
5445 : }
5446 :
5447 0 : if (nvme_ctrlr_create(ctrlr, name, trid, NULL) == 0) {
5448 0 : SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name);
5449 : } else {
5450 0 : SPDK_ERRLOG("Failed to attach to %s (%s)\n", trid->traddr, name);
5451 : }
5452 :
5453 0 : free(name);
5454 : }
5455 :
5456 : static void
5457 59 : _nvme_ctrlr_destruct(void *ctx)
5458 : {
5459 59 : struct nvme_ctrlr *nvme_ctrlr = ctx;
5460 :
5461 59 : nvme_ctrlr_depopulate_namespaces(nvme_ctrlr);
5462 59 : nvme_ctrlr_release(nvme_ctrlr);
5463 59 : }
5464 :
5465 : static int
5466 56 : bdev_nvme_delete_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
5467 : {
5468 : struct nvme_probe_skip_entry *entry;
5469 :
5470 : /* The controller's destruction was already started */
5471 56 : if (nvme_ctrlr->destruct) {
5472 0 : return -EALREADY;
5473 : }
5474 :
5475 56 : if (!hotplug &&
5476 56 : nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
5477 0 : entry = calloc(1, sizeof(*entry));
5478 0 : if (!entry) {
5479 0 : return -ENOMEM;
5480 : }
5481 0 : entry->trid = nvme_ctrlr->active_path_id->trid;
5482 0 : TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
5483 : }
5484 :
5485 56 : nvme_ctrlr->destruct = true;
5486 56 : return 0;
5487 : }
5488 :
5489 : static int
5490 2 : bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
5491 : {
5492 : int rc;
5493 :
5494 2 : pthread_mutex_lock(&nvme_ctrlr->mutex);
5495 2 : rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, hotplug);
5496 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5497 :
5498 2 : if (rc == 0) {
5499 2 : _nvme_ctrlr_destruct(nvme_ctrlr);
5500 0 : } else if (rc == -EALREADY) {
5501 0 : rc = 0;
5502 : }
5503 :
5504 2 : return rc;
5505 : }
5506 :
5507 : static void
5508 0 : remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
5509 : {
5510 0 : struct nvme_ctrlr *nvme_ctrlr = cb_ctx;
5511 :
5512 0 : bdev_nvme_delete_ctrlr(nvme_ctrlr, true);
5513 0 : }
5514 :
5515 : static int
5516 0 : bdev_nvme_hotplug_probe(void *arg)
5517 : {
5518 0 : if (g_hotplug_probe_ctx == NULL) {
5519 0 : spdk_poller_unregister(&g_hotplug_probe_poller);
5520 0 : return SPDK_POLLER_IDLE;
5521 : }
5522 :
5523 0 : if (spdk_nvme_probe_poll_async(g_hotplug_probe_ctx) != -EAGAIN) {
5524 0 : g_hotplug_probe_ctx = NULL;
5525 0 : spdk_poller_unregister(&g_hotplug_probe_poller);
5526 : }
5527 :
5528 0 : return SPDK_POLLER_BUSY;
5529 : }
5530 :
5531 : static int
5532 0 : bdev_nvme_hotplug(void *arg)
5533 : {
5534 0 : struct spdk_nvme_transport_id trid_pcie;
5535 :
5536 0 : if (g_hotplug_probe_ctx) {
5537 0 : return SPDK_POLLER_BUSY;
5538 : }
5539 :
5540 0 : memset(&trid_pcie, 0, sizeof(trid_pcie));
5541 0 : spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
5542 :
5543 0 : g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
5544 : hotplug_probe_cb, attach_cb, NULL);
5545 :
5546 0 : if (g_hotplug_probe_ctx) {
5547 0 : assert(g_hotplug_probe_poller == NULL);
5548 0 : g_hotplug_probe_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug_probe, NULL, 1000);
5549 : }
5550 :
5551 0 : return SPDK_POLLER_BUSY;
5552 : }
5553 :
5554 : void
5555 0 : bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
5556 : {
5557 0 : *opts = g_opts;
5558 0 : }
5559 :
5560 : static bool bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
5561 : uint32_t reconnect_delay_sec,
5562 : uint32_t fast_io_fail_timeout_sec);
5563 :
5564 : static int
5565 0 : bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
5566 : {
5567 0 : if ((opts->timeout_us == 0) && (opts->timeout_admin_us != 0)) {
5568 : /* Can't set timeout_admin_us without also setting timeout_us */
5569 0 : SPDK_WARNLOG("Invalid options: Can't have (timeout_us == 0) with (timeout_admin_us > 0)\n");
5570 0 : return -EINVAL;
5571 : }
5572 :
5573 0 : if (opts->bdev_retry_count < -1) {
5574 0 : SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n");
5575 0 : return -EINVAL;
5576 : }
5577 :
5578 0 : if (!bdev_nvme_check_io_error_resiliency_params(opts->ctrlr_loss_timeout_sec,
5579 0 : opts->reconnect_delay_sec,
5580 0 : opts->fast_io_fail_timeout_sec)) {
5581 0 : return -EINVAL;
5582 : }
5583 :
5584 0 : return 0;
5585 : }
5586 :
5587 : int
5588 0 : bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
5589 : {
5590 : int ret;
5591 :
5592 0 : ret = bdev_nvme_validate_opts(opts);
5593 0 : if (ret) {
5594 0 : SPDK_WARNLOG("Failed to set nvme opts.\n");
5595 0 : return ret;
5596 : }
5597 :
5598 0 : if (g_bdev_nvme_init_thread != NULL) {
5599 0 : if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
5600 0 : return -EPERM;
5601 : }
5602 : }
5603 :
5604 0 : if (opts->rdma_srq_size != 0 ||
5605 0 : opts->rdma_max_cq_size != 0) {
5606 0 : struct spdk_nvme_transport_opts drv_opts;
5607 :
5608 0 : spdk_nvme_transport_get_opts(&drv_opts, sizeof(drv_opts));
5609 0 : if (opts->rdma_srq_size != 0) {
5610 0 : drv_opts.rdma_srq_size = opts->rdma_srq_size;
5611 : }
5612 0 : if (opts->rdma_max_cq_size != 0) {
5613 0 : drv_opts.rdma_max_cq_size = opts->rdma_max_cq_size;
5614 : }
5615 :
5616 0 : ret = spdk_nvme_transport_set_opts(&drv_opts, sizeof(drv_opts));
5617 0 : if (ret) {
5618 0 : SPDK_ERRLOG("Failed to set NVMe transport opts.\n");
5619 0 : return ret;
5620 : }
5621 : }
5622 :
5623 0 : g_opts = *opts;
5624 :
5625 0 : return 0;
5626 : }
5627 :
5628 : struct set_nvme_hotplug_ctx {
5629 : uint64_t period_us;
5630 : bool enabled;
5631 : spdk_msg_fn fn;
5632 : void *fn_ctx;
5633 : };
5634 :
5635 : static void
5636 0 : set_nvme_hotplug_period_cb(void *_ctx)
5637 : {
5638 0 : struct set_nvme_hotplug_ctx *ctx = _ctx;
5639 :
5640 0 : spdk_poller_unregister(&g_hotplug_poller);
5641 0 : if (ctx->enabled) {
5642 0 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us);
5643 : }
5644 :
5645 0 : g_nvme_hotplug_poll_period_us = ctx->period_us;
5646 0 : g_nvme_hotplug_enabled = ctx->enabled;
5647 0 : if (ctx->fn) {
5648 0 : ctx->fn(ctx->fn_ctx);
5649 : }
5650 :
5651 0 : free(ctx);
5652 0 : }
5653 :
5654 : int
5655 0 : bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
5656 : {
5657 : struct set_nvme_hotplug_ctx *ctx;
5658 :
5659 0 : if (enabled == true && !spdk_process_is_primary()) {
5660 0 : return -EPERM;
5661 : }
5662 :
5663 0 : ctx = calloc(1, sizeof(*ctx));
5664 0 : if (ctx == NULL) {
5665 0 : return -ENOMEM;
5666 : }
5667 :
5668 0 : period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
5669 0 : ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
5670 0 : ctx->enabled = enabled;
5671 0 : ctx->fn = cb;
5672 0 : ctx->fn_ctx = cb_ctx;
5673 :
5674 0 : spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
5675 0 : return 0;
5676 : }
5677 :
5678 : static void
5679 45 : nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
5680 : struct nvme_async_probe_ctx *ctx)
5681 : {
5682 : struct nvme_ns *nvme_ns;
5683 : struct nvme_bdev *nvme_bdev;
5684 : size_t j;
5685 :
5686 45 : assert(nvme_ctrlr != NULL);
5687 :
5688 45 : if (ctx->names == NULL) {
5689 0 : ctx->reported_bdevs = 0;
5690 0 : populate_namespaces_cb(ctx, 0);
5691 0 : return;
5692 : }
5693 :
5694 : /*
5695 : * Report the new bdevs that were created in this call.
5696 : * There can be more than one bdev per NVMe controller.
5697 : */
5698 45 : j = 0;
5699 45 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
5700 92 : while (nvme_ns != NULL) {
5701 47 : nvme_bdev = nvme_ns->bdev;
5702 47 : if (j < ctx->max_bdevs) {
5703 47 : ctx->names[j] = nvme_bdev->disk.name;
5704 47 : j++;
5705 : } else {
5706 0 : SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n",
5707 : ctx->max_bdevs);
5708 0 : ctx->reported_bdevs = 0;
5709 0 : populate_namespaces_cb(ctx, -ERANGE);
5710 0 : return;
5711 : }
5712 :
5713 47 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
5714 : }
5715 :
5716 45 : ctx->reported_bdevs = j;
5717 45 : populate_namespaces_cb(ctx, 0);
5718 : }
5719 :
5720 : static int
5721 9 : bdev_nvme_check_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
5722 : struct spdk_nvme_ctrlr *new_ctrlr,
5723 : struct spdk_nvme_transport_id *trid)
5724 : {
5725 : struct nvme_path_id *tmp_trid;
5726 :
5727 9 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
5728 0 : SPDK_ERRLOG("PCIe failover is not supported.\n");
5729 0 : return -ENOTSUP;
5730 : }
5731 :
5732 : /* Currently we only support failover to the same transport type. */
5733 9 : if (nvme_ctrlr->active_path_id->trid.trtype != trid->trtype) {
5734 0 : SPDK_WARNLOG("Failover from trtype: %s to a different trtype: %s is not supported currently\n",
5735 : spdk_nvme_transport_id_trtype_str(nvme_ctrlr->active_path_id->trid.trtype),
5736 : spdk_nvme_transport_id_trtype_str(trid->trtype));
5737 0 : return -EINVAL;
5738 : }
5739 :
5740 :
5741 : /* Currently we only support failover to the same NQN. */
5742 9 : if (strncmp(trid->subnqn, nvme_ctrlr->active_path_id->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
5743 0 : SPDK_WARNLOG("Failover from subnqn: %s to a different subnqn: %s is not supported currently\n",
5744 : nvme_ctrlr->active_path_id->trid.subnqn, trid->subnqn);
5745 0 : return -EINVAL;
5746 : }
5747 :
5748 : /* Skip all the other checks if we've already registered this path. */
5749 21 : TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) {
5750 12 : if (!spdk_nvme_transport_id_compare(&tmp_trid->trid, trid)) {
5751 0 : SPDK_WARNLOG("This path (traddr: %s subnqn: %s) is already registered\n", trid->traddr,
5752 : trid->subnqn);
5753 0 : return -EEXIST;
5754 : }
5755 : }
5756 :
5757 9 : return 0;
5758 : }
5759 :
5760 : static int
5761 9 : bdev_nvme_check_secondary_namespace(struct nvme_ctrlr *nvme_ctrlr,
5762 : struct spdk_nvme_ctrlr *new_ctrlr)
5763 : {
5764 : struct nvme_ns *nvme_ns;
5765 : struct spdk_nvme_ns *new_ns;
5766 :
5767 9 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
5768 9 : while (nvme_ns != NULL) {
5769 0 : new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id);
5770 0 : assert(new_ns != NULL);
5771 :
5772 0 : if (!bdev_nvme_compare_ns(nvme_ns->ns, new_ns)) {
5773 0 : return -EINVAL;
5774 : }
5775 :
5776 0 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
5777 : }
5778 :
5779 9 : return 0;
5780 : }
5781 :
5782 : static int
5783 9 : _bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
5784 : struct spdk_nvme_transport_id *trid)
5785 : {
5786 : struct nvme_path_id *active_id, *new_trid, *tmp_trid;
5787 :
5788 9 : new_trid = calloc(1, sizeof(*new_trid));
5789 9 : if (new_trid == NULL) {
5790 0 : return -ENOMEM;
5791 : }
5792 9 : new_trid->trid = *trid;
5793 :
5794 9 : active_id = nvme_ctrlr->active_path_id;
5795 9 : assert(active_id != NULL);
5796 9 : assert(active_id == TAILQ_FIRST(&nvme_ctrlr->trids));
5797 :
5798 : /* Skip the active trid not to replace it until it is failed. */
5799 9 : tmp_trid = TAILQ_NEXT(active_id, link);
5800 9 : if (tmp_trid == NULL) {
5801 6 : goto add_tail;
5802 : }
5803 :
5804 : /* It means the trid is faled if its last failed time is non-zero.
5805 : * Insert the new alternate trid before any failed trid.
5806 : */
5807 5 : TAILQ_FOREACH_FROM(tmp_trid, &nvme_ctrlr->trids, link) {
5808 3 : if (tmp_trid->last_failed_tsc != 0) {
5809 1 : TAILQ_INSERT_BEFORE(tmp_trid, new_trid, link);
5810 1 : return 0;
5811 : }
5812 : }
5813 :
5814 2 : add_tail:
5815 8 : TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, new_trid, link);
5816 8 : return 0;
5817 : }
5818 :
5819 : /* This is the case that a secondary path is added to an existing
5820 : * nvme_ctrlr for failover. After checking if it can access the same
5821 : * namespaces as the primary path, it is disconnected until failover occurs.
5822 : */
5823 : static int
5824 9 : bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
5825 : struct spdk_nvme_ctrlr *new_ctrlr,
5826 : struct spdk_nvme_transport_id *trid)
5827 : {
5828 : int rc;
5829 :
5830 9 : assert(nvme_ctrlr != NULL);
5831 :
5832 9 : pthread_mutex_lock(&nvme_ctrlr->mutex);
5833 :
5834 9 : rc = bdev_nvme_check_secondary_trid(nvme_ctrlr, new_ctrlr, trid);
5835 9 : if (rc != 0) {
5836 0 : goto exit;
5837 : }
5838 :
5839 9 : rc = bdev_nvme_check_secondary_namespace(nvme_ctrlr, new_ctrlr);
5840 9 : if (rc != 0) {
5841 0 : goto exit;
5842 : }
5843 :
5844 9 : rc = _bdev_nvme_add_secondary_trid(nvme_ctrlr, trid);
5845 :
5846 9 : exit:
5847 9 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5848 :
5849 9 : spdk_nvme_detach(new_ctrlr);
5850 :
5851 9 : return rc;
5852 : }
5853 :
5854 : static void
5855 46 : connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
5856 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
5857 : {
5858 46 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
5859 : struct nvme_async_probe_ctx *ctx;
5860 : int rc;
5861 :
5862 46 : ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
5863 46 : ctx->ctrlr_attached = true;
5864 :
5865 46 : rc = nvme_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx);
5866 46 : if (rc != 0) {
5867 1 : ctx->reported_bdevs = 0;
5868 1 : populate_namespaces_cb(ctx, rc);
5869 : }
5870 46 : }
5871 :
5872 : static void
5873 4 : connect_set_failover_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
5874 : struct spdk_nvme_ctrlr *ctrlr,
5875 : const struct spdk_nvme_ctrlr_opts *opts)
5876 : {
5877 4 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
5878 : struct nvme_ctrlr *nvme_ctrlr;
5879 : struct nvme_async_probe_ctx *ctx;
5880 : int rc;
5881 :
5882 4 : ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
5883 4 : ctx->ctrlr_attached = true;
5884 :
5885 4 : nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
5886 4 : if (nvme_ctrlr) {
5887 4 : rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, ctrlr, &ctx->trid);
5888 : } else {
5889 0 : rc = -ENODEV;
5890 : }
5891 :
5892 4 : ctx->reported_bdevs = 0;
5893 4 : populate_namespaces_cb(ctx, rc);
5894 4 : }
5895 :
5896 : static int
5897 51 : bdev_nvme_async_poll(void *arg)
5898 : {
5899 51 : struct nvme_async_probe_ctx *ctx = arg;
5900 : int rc;
5901 :
5902 51 : rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
5903 51 : if (spdk_unlikely(rc != -EAGAIN)) {
5904 51 : ctx->probe_done = true;
5905 51 : spdk_poller_unregister(&ctx->poller);
5906 51 : if (!ctx->ctrlr_attached) {
5907 : /* The probe is done, but no controller was attached.
5908 : * That means we had a failure, so report -EIO back to
5909 : * the caller (usually the RPC). populate_namespaces_cb()
5910 : * will take care of freeing the nvme_async_probe_ctx.
5911 : */
5912 1 : ctx->reported_bdevs = 0;
5913 1 : populate_namespaces_cb(ctx, -EIO);
5914 50 : } else if (ctx->namespaces_populated) {
5915 : /* The namespaces for the attached controller were all
5916 : * populated and the response was already sent to the
5917 : * caller (usually the RPC). So free the context here.
5918 : */
5919 20 : free(ctx);
5920 : }
5921 : }
5922 :
5923 51 : return SPDK_POLLER_BUSY;
5924 : }
5925 :
5926 : static bool
5927 28 : bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
5928 : uint32_t reconnect_delay_sec,
5929 : uint32_t fast_io_fail_timeout_sec)
5930 : {
5931 28 : if (ctrlr_loss_timeout_sec < -1) {
5932 1 : SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n");
5933 1 : return false;
5934 27 : } else if (ctrlr_loss_timeout_sec == -1) {
5935 13 : if (reconnect_delay_sec == 0) {
5936 1 : SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
5937 1 : return false;
5938 12 : } else if (fast_io_fail_timeout_sec != 0 &&
5939 : fast_io_fail_timeout_sec < reconnect_delay_sec) {
5940 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io-fail_timeout_sec.\n");
5941 1 : return false;
5942 : }
5943 14 : } else if (ctrlr_loss_timeout_sec != 0) {
5944 11 : if (reconnect_delay_sec == 0) {
5945 1 : SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
5946 1 : return false;
5947 10 : } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) {
5948 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n");
5949 1 : return false;
5950 9 : } else if (fast_io_fail_timeout_sec != 0) {
5951 6 : if (fast_io_fail_timeout_sec < reconnect_delay_sec) {
5952 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io_fail_timeout_sec.\n");
5953 1 : return false;
5954 5 : } else if (fast_io_fail_timeout_sec > (uint32_t)ctrlr_loss_timeout_sec) {
5955 1 : SPDK_ERRLOG("fast_io_fail_timeout_sec can't be more than ctrlr_loss_timeout_sec.\n");
5956 1 : return false;
5957 : }
5958 : }
5959 3 : } else if (reconnect_delay_sec != 0 || fast_io_fail_timeout_sec != 0) {
5960 2 : SPDK_ERRLOG("Both reconnect_delay_sec and fast_io_fail_timeout_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n");
5961 2 : return false;
5962 : }
5963 :
5964 19 : return true;
5965 : }
5966 :
5967 : int
5968 51 : bdev_nvme_create(struct spdk_nvme_transport_id *trid,
5969 : const char *base_name,
5970 : const char **names,
5971 : uint32_t count,
5972 : spdk_bdev_create_nvme_fn cb_fn,
5973 : void *cb_ctx,
5974 : struct spdk_nvme_ctrlr_opts *drv_opts,
5975 : struct nvme_ctrlr_opts *bdev_opts,
5976 : bool multipath)
5977 : {
5978 : struct nvme_probe_skip_entry *entry, *tmp;
5979 : struct nvme_async_probe_ctx *ctx;
5980 : spdk_nvme_attach_cb attach_cb;
5981 :
5982 : /* TODO expand this check to include both the host and target TRIDs.
5983 : * Only if both are the same should we fail.
5984 : */
5985 51 : if (nvme_ctrlr_get(trid) != NULL) {
5986 0 : SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr);
5987 0 : return -EEXIST;
5988 : }
5989 :
5990 51 : if (bdev_opts != NULL &&
5991 9 : !bdev_nvme_check_io_error_resiliency_params(bdev_opts->ctrlr_loss_timeout_sec,
5992 : bdev_opts->reconnect_delay_sec,
5993 : bdev_opts->fast_io_fail_timeout_sec)) {
5994 0 : return -EINVAL;
5995 : }
5996 :
5997 51 : ctx = calloc(1, sizeof(*ctx));
5998 51 : if (!ctx) {
5999 0 : return -ENOMEM;
6000 : }
6001 51 : ctx->base_name = base_name;
6002 51 : ctx->names = names;
6003 51 : ctx->max_bdevs = count;
6004 51 : ctx->cb_fn = cb_fn;
6005 51 : ctx->cb_ctx = cb_ctx;
6006 51 : ctx->trid = *trid;
6007 :
6008 51 : if (bdev_opts) {
6009 9 : memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
6010 : } else {
6011 42 : bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
6012 : }
6013 :
6014 51 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
6015 0 : TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
6016 0 : if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
6017 0 : TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
6018 0 : free(entry);
6019 0 : break;
6020 : }
6021 : }
6022 : }
6023 :
6024 51 : if (drv_opts) {
6025 0 : memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
6026 : } else {
6027 51 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->drv_opts, sizeof(ctx->drv_opts));
6028 : }
6029 :
6030 51 : ctx->drv_opts.transport_retry_count = g_opts.transport_retry_count;
6031 51 : ctx->drv_opts.transport_ack_timeout = g_opts.transport_ack_timeout;
6032 51 : ctx->drv_opts.keep_alive_timeout_ms = g_opts.keep_alive_timeout_ms;
6033 51 : ctx->drv_opts.disable_read_ana_log_page = true;
6034 51 : ctx->drv_opts.transport_tos = g_opts.transport_tos;
6035 :
6036 51 : if (nvme_bdev_ctrlr_get_by_name(base_name) == NULL || multipath) {
6037 47 : attach_cb = connect_attach_cb;
6038 : } else {
6039 4 : attach_cb = connect_set_failover_cb;
6040 : }
6041 :
6042 51 : ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, attach_cb);
6043 51 : if (ctx->probe_ctx == NULL) {
6044 0 : SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
6045 0 : free(ctx);
6046 0 : return -ENODEV;
6047 : }
6048 51 : ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000);
6049 :
6050 51 : return 0;
6051 : }
6052 :
6053 : struct bdev_nvme_delete_ctx {
6054 : char *name;
6055 : struct nvme_path_id path_id;
6056 : bdev_nvme_delete_done_fn delete_done;
6057 : void *delete_done_ctx;
6058 : uint64_t timeout_ticks;
6059 : struct spdk_poller *poller;
6060 : };
6061 :
6062 : static void
6063 2 : free_bdev_nvme_delete_ctx(struct bdev_nvme_delete_ctx *ctx)
6064 : {
6065 2 : if (ctx != NULL) {
6066 1 : free(ctx->name);
6067 1 : free(ctx);
6068 : }
6069 2 : }
6070 :
6071 : static bool
6072 74 : nvme_path_id_compare(struct nvme_path_id *p, const struct nvme_path_id *path_id)
6073 : {
6074 74 : if (path_id->trid.trtype != 0) {
6075 21 : if (path_id->trid.trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
6076 0 : if (strcasecmp(path_id->trid.trstring, p->trid.trstring) != 0) {
6077 0 : return false;
6078 : }
6079 : } else {
6080 21 : if (path_id->trid.trtype != p->trid.trtype) {
6081 0 : return false;
6082 : }
6083 : }
6084 : }
6085 :
6086 74 : if (!spdk_mem_all_zero(path_id->trid.traddr, sizeof(path_id->trid.traddr))) {
6087 21 : if (strcasecmp(path_id->trid.traddr, p->trid.traddr) != 0) {
6088 11 : return false;
6089 : }
6090 : }
6091 :
6092 63 : if (path_id->trid.adrfam != 0) {
6093 0 : if (path_id->trid.adrfam != p->trid.adrfam) {
6094 0 : return false;
6095 : }
6096 : }
6097 :
6098 63 : if (!spdk_mem_all_zero(path_id->trid.trsvcid, sizeof(path_id->trid.trsvcid))) {
6099 10 : if (strcasecmp(path_id->trid.trsvcid, p->trid.trsvcid) != 0) {
6100 0 : return false;
6101 : }
6102 : }
6103 :
6104 63 : if (!spdk_mem_all_zero(path_id->trid.subnqn, sizeof(path_id->trid.subnqn))) {
6105 10 : if (strcmp(path_id->trid.subnqn, p->trid.subnqn) != 0) {
6106 0 : return false;
6107 : }
6108 : }
6109 :
6110 63 : if (!spdk_mem_all_zero(path_id->hostid.hostaddr, sizeof(path_id->hostid.hostaddr))) {
6111 0 : if (strcmp(path_id->hostid.hostaddr, p->hostid.hostaddr) != 0) {
6112 0 : return false;
6113 : }
6114 : }
6115 :
6116 63 : if (!spdk_mem_all_zero(path_id->hostid.hostsvcid, sizeof(path_id->hostid.hostsvcid))) {
6117 0 : if (strcmp(path_id->hostid.hostsvcid, p->hostid.hostsvcid) != 0) {
6118 0 : return false;
6119 : }
6120 : }
6121 :
6122 63 : return true;
6123 : }
6124 :
6125 : static bool
6126 2 : nvme_path_id_exists(const char *name, const struct nvme_path_id *path_id)
6127 : {
6128 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
6129 : struct nvme_ctrlr *ctrlr;
6130 : struct nvme_path_id *p;
6131 :
6132 2 : pthread_mutex_lock(&g_bdev_nvme_mutex);
6133 2 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
6134 2 : if (!nbdev_ctrlr) {
6135 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6136 1 : return false;
6137 : }
6138 :
6139 1 : TAILQ_FOREACH(ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
6140 1 : pthread_mutex_lock(&ctrlr->mutex);
6141 1 : TAILQ_FOREACH(p, &ctrlr->trids, link) {
6142 1 : if (nvme_path_id_compare(p, path_id)) {
6143 1 : pthread_mutex_unlock(&ctrlr->mutex);
6144 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6145 1 : return true;
6146 : }
6147 : }
6148 0 : pthread_mutex_unlock(&ctrlr->mutex);
6149 : }
6150 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6151 :
6152 0 : return false;
6153 : }
6154 :
6155 : static int
6156 2 : bdev_nvme_delete_complete_poll(void *arg)
6157 : {
6158 2 : struct bdev_nvme_delete_ctx *ctx = arg;
6159 2 : int rc = 0;
6160 :
6161 2 : if (nvme_path_id_exists(ctx->name, &ctx->path_id)) {
6162 1 : if (ctx->timeout_ticks > spdk_get_ticks()) {
6163 1 : return SPDK_POLLER_BUSY;
6164 : }
6165 :
6166 0 : SPDK_ERRLOG("NVMe path '%s' still exists after delete\n", ctx->name);
6167 0 : rc = -ETIMEDOUT;
6168 : }
6169 :
6170 1 : spdk_poller_unregister(&ctx->poller);
6171 :
6172 1 : ctx->delete_done(ctx->delete_done_ctx, rc);
6173 1 : free_bdev_nvme_delete_ctx(ctx);
6174 :
6175 1 : return SPDK_POLLER_BUSY;
6176 : }
6177 :
6178 : static int
6179 63 : _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, const struct nvme_path_id *path_id)
6180 : {
6181 : struct nvme_path_id *p, *t;
6182 : spdk_msg_fn msg_fn;
6183 63 : int rc = -ENXIO;
6184 :
6185 63 : pthread_mutex_lock(&nvme_ctrlr->mutex);
6186 :
6187 73 : TAILQ_FOREACH_REVERSE_SAFE(p, &nvme_ctrlr->trids, nvme_paths, link, t) {
6188 73 : if (p == TAILQ_FIRST(&nvme_ctrlr->trids)) {
6189 63 : break;
6190 : }
6191 :
6192 10 : if (!nvme_path_id_compare(p, path_id)) {
6193 3 : continue;
6194 : }
6195 :
6196 : /* We are not using the specified path. */
6197 7 : TAILQ_REMOVE(&nvme_ctrlr->trids, p, link);
6198 7 : free(p);
6199 7 : rc = 0;
6200 : }
6201 :
6202 63 : if (p == NULL || !nvme_path_id_compare(p, path_id)) {
6203 8 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6204 8 : return rc;
6205 : }
6206 :
6207 : /* If we made it here, then this path is a match! Now we need to remove it. */
6208 :
6209 : /* This is the active path in use right now. The active path is always the first in the list. */
6210 55 : assert(p == nvme_ctrlr->active_path_id);
6211 :
6212 55 : if (!TAILQ_NEXT(p, link)) {
6213 : /* The current path is the only path. */
6214 54 : msg_fn = _nvme_ctrlr_destruct;
6215 54 : rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, false);
6216 : } else {
6217 : /* There is an alternative path. */
6218 1 : msg_fn = _bdev_nvme_reset_ctrlr;
6219 1 : rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, true);
6220 : }
6221 :
6222 55 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6223 :
6224 55 : if (rc == 0) {
6225 55 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
6226 0 : } else if (rc == -EALREADY) {
6227 0 : rc = 0;
6228 : }
6229 :
6230 55 : return rc;
6231 : }
6232 :
6233 : int
6234 48 : bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id,
6235 : bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx)
6236 : {
6237 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
6238 : struct nvme_ctrlr *nvme_ctrlr, *tmp_nvme_ctrlr;
6239 48 : struct bdev_nvme_delete_ctx *ctx = NULL;
6240 48 : int rc = -ENXIO, _rc;
6241 :
6242 48 : if (name == NULL || path_id == NULL) {
6243 0 : rc = -EINVAL;
6244 0 : goto exit;
6245 : }
6246 :
6247 48 : pthread_mutex_lock(&g_bdev_nvme_mutex);
6248 :
6249 48 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
6250 48 : if (nbdev_ctrlr == NULL) {
6251 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6252 :
6253 0 : SPDK_ERRLOG("Failed to find NVMe bdev controller\n");
6254 0 : rc = -ENODEV;
6255 0 : goto exit;
6256 : }
6257 :
6258 111 : TAILQ_FOREACH_SAFE(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq, tmp_nvme_ctrlr) {
6259 63 : _rc = _bdev_nvme_delete(nvme_ctrlr, path_id);
6260 63 : if (_rc < 0 && _rc != -ENXIO) {
6261 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6262 0 : rc = _rc;
6263 0 : goto exit;
6264 63 : } else if (_rc == 0) {
6265 : /* We traverse all remaining nvme_ctrlrs even if one nvme_ctrlr
6266 : * was deleted successfully. To remember the successful deletion,
6267 : * overwrite rc only if _rc is zero.
6268 : */
6269 57 : rc = 0;
6270 : }
6271 : }
6272 :
6273 48 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6274 :
6275 48 : if (rc != 0 || delete_done == NULL) {
6276 47 : goto exit;
6277 : }
6278 :
6279 1 : ctx = calloc(1, sizeof(*ctx));
6280 1 : if (ctx == NULL) {
6281 0 : SPDK_ERRLOG("Failed to allocate context for bdev_nvme_delete\n");
6282 0 : rc = -ENOMEM;
6283 0 : goto exit;
6284 : }
6285 :
6286 1 : ctx->name = strdup(name);
6287 1 : if (ctx->name == NULL) {
6288 0 : SPDK_ERRLOG("Failed to copy controller name for deletion\n");
6289 0 : rc = -ENOMEM;
6290 0 : goto exit;
6291 : }
6292 :
6293 1 : ctx->delete_done = delete_done;
6294 1 : ctx->delete_done_ctx = delete_done_ctx;
6295 1 : ctx->path_id = *path_id;
6296 1 : ctx->timeout_ticks = spdk_get_ticks() + 10 * spdk_get_ticks_hz();
6297 1 : ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_delete_complete_poll, ctx, 1000);
6298 1 : if (ctx->poller == NULL) {
6299 0 : SPDK_ERRLOG("Failed to register bdev_nvme_delete poller\n");
6300 0 : rc = -ENOMEM;
6301 0 : goto exit;
6302 : }
6303 :
6304 1 : exit:
6305 48 : if (rc != 0) {
6306 1 : free_bdev_nvme_delete_ctx(ctx);
6307 : }
6308 :
6309 48 : return rc;
6310 : }
6311 :
6312 : #define DISCOVERY_INFOLOG(ctx, format, ...) \
6313 : SPDK_INFOLOG(bdev_nvme, "Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
6314 :
6315 : #define DISCOVERY_ERRLOG(ctx, format, ...) \
6316 : SPDK_ERRLOG("Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
6317 :
6318 : struct discovery_entry_ctx {
6319 : char name[128];
6320 : struct spdk_nvme_transport_id trid;
6321 : struct spdk_nvme_ctrlr_opts drv_opts;
6322 : struct spdk_nvmf_discovery_log_page_entry entry;
6323 : TAILQ_ENTRY(discovery_entry_ctx) tailq;
6324 : struct discovery_ctx *ctx;
6325 : };
6326 :
6327 : struct discovery_ctx {
6328 : char *name;
6329 : spdk_bdev_nvme_start_discovery_fn start_cb_fn;
6330 : spdk_bdev_nvme_stop_discovery_fn stop_cb_fn;
6331 : void *cb_ctx;
6332 : struct spdk_nvme_probe_ctx *probe_ctx;
6333 : struct spdk_nvme_detach_ctx *detach_ctx;
6334 : struct spdk_nvme_ctrlr *ctrlr;
6335 : struct spdk_nvme_transport_id trid;
6336 : struct discovery_entry_ctx *entry_ctx_in_use;
6337 : struct spdk_poller *poller;
6338 : struct spdk_nvme_ctrlr_opts drv_opts;
6339 : struct nvme_ctrlr_opts bdev_opts;
6340 : struct spdk_nvmf_discovery_log_page *log_page;
6341 : TAILQ_ENTRY(discovery_ctx) tailq;
6342 : TAILQ_HEAD(, discovery_entry_ctx) nvm_entry_ctxs;
6343 : TAILQ_HEAD(, discovery_entry_ctx) discovery_entry_ctxs;
6344 : int rc;
6345 : bool wait_for_attach;
6346 : uint64_t timeout_ticks;
6347 : /* Denotes that the discovery service is being started. We're waiting
6348 : * for the initial connection to the discovery controller to be
6349 : * established and attach discovered NVM ctrlrs.
6350 : */
6351 : bool initializing;
6352 : /* Denotes if a discovery is currently in progress for this context.
6353 : * That includes connecting to newly discovered subsystems. Used to
6354 : * ensure we do not start a new discovery until an existing one is
6355 : * complete.
6356 : */
6357 : bool in_progress;
6358 :
6359 : /* Denotes if another discovery is needed after the one in progress
6360 : * completes. Set when we receive an AER completion while a discovery
6361 : * is already in progress.
6362 : */
6363 : bool pending;
6364 :
6365 : /* Signal to the discovery context poller that it should stop the
6366 : * discovery service, including detaching from the current discovery
6367 : * controller.
6368 : */
6369 : bool stop;
6370 :
6371 : struct spdk_thread *calling_thread;
6372 : uint32_t index;
6373 : uint32_t attach_in_progress;
6374 : char *hostnqn;
6375 :
6376 : /* Denotes if the discovery service was started by the mdns discovery.
6377 : */
6378 : bool from_mdns_discovery_service;
6379 : };
6380 :
6381 : TAILQ_HEAD(discovery_ctxs, discovery_ctx);
6382 : static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs);
6383 :
6384 : static void get_discovery_log_page(struct discovery_ctx *ctx);
6385 :
6386 : static void
6387 0 : free_discovery_ctx(struct discovery_ctx *ctx)
6388 : {
6389 0 : free(ctx->log_page);
6390 0 : free(ctx->hostnqn);
6391 0 : free(ctx->name);
6392 0 : free(ctx);
6393 0 : }
6394 :
6395 : static void
6396 0 : discovery_complete(struct discovery_ctx *ctx)
6397 : {
6398 0 : ctx->initializing = false;
6399 0 : ctx->in_progress = false;
6400 0 : if (ctx->pending) {
6401 0 : ctx->pending = false;
6402 0 : get_discovery_log_page(ctx);
6403 : }
6404 0 : }
6405 :
6406 : static void
6407 0 : build_trid_from_log_page_entry(struct spdk_nvme_transport_id *trid,
6408 : struct spdk_nvmf_discovery_log_page_entry *entry)
6409 : {
6410 : char *space;
6411 :
6412 0 : trid->trtype = entry->trtype;
6413 0 : trid->adrfam = entry->adrfam;
6414 0 : memcpy(trid->traddr, entry->traddr, sizeof(entry->traddr));
6415 0 : memcpy(trid->trsvcid, entry->trsvcid, sizeof(entry->trsvcid));
6416 : /* Because the source buffer (entry->subnqn) is longer than trid->subnqn, and
6417 : * before call to this function trid->subnqn is zeroed out, we need
6418 : * to copy sizeof(trid->subnqn) minus one byte to make sure the last character
6419 : * remains 0. Then we can shorten the string (replace ' ' with 0) if required
6420 : */
6421 0 : memcpy(trid->subnqn, entry->subnqn, sizeof(trid->subnqn) - 1);
6422 :
6423 : /* We want the traddr, trsvcid and subnqn fields to be NULL-terminated.
6424 : * But the log page entries typically pad them with spaces, not zeroes.
6425 : * So add a NULL terminator to each of these fields at the appropriate
6426 : * location.
6427 : */
6428 0 : space = strchr(trid->traddr, ' ');
6429 0 : if (space) {
6430 0 : *space = 0;
6431 : }
6432 0 : space = strchr(trid->trsvcid, ' ');
6433 0 : if (space) {
6434 0 : *space = 0;
6435 : }
6436 0 : space = strchr(trid->subnqn, ' ');
6437 0 : if (space) {
6438 0 : *space = 0;
6439 : }
6440 0 : }
6441 :
6442 : static void
6443 0 : _stop_discovery(void *_ctx)
6444 : {
6445 0 : struct discovery_ctx *ctx = _ctx;
6446 :
6447 0 : if (ctx->attach_in_progress > 0) {
6448 0 : spdk_thread_send_msg(spdk_get_thread(), _stop_discovery, ctx);
6449 0 : return;
6450 : }
6451 :
6452 0 : ctx->stop = true;
6453 :
6454 0 : while (!TAILQ_EMPTY(&ctx->nvm_entry_ctxs)) {
6455 : struct discovery_entry_ctx *entry_ctx;
6456 0 : struct nvme_path_id path = {};
6457 :
6458 0 : entry_ctx = TAILQ_FIRST(&ctx->nvm_entry_ctxs);
6459 0 : path.trid = entry_ctx->trid;
6460 0 : bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
6461 0 : TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
6462 0 : free(entry_ctx);
6463 : }
6464 :
6465 0 : while (!TAILQ_EMPTY(&ctx->discovery_entry_ctxs)) {
6466 : struct discovery_entry_ctx *entry_ctx;
6467 :
6468 0 : entry_ctx = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
6469 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
6470 0 : free(entry_ctx);
6471 : }
6472 :
6473 0 : free(ctx->entry_ctx_in_use);
6474 0 : ctx->entry_ctx_in_use = NULL;
6475 : }
6476 :
6477 : static void
6478 0 : stop_discovery(struct discovery_ctx *ctx, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
6479 : {
6480 0 : ctx->stop_cb_fn = cb_fn;
6481 0 : ctx->cb_ctx = cb_ctx;
6482 :
6483 0 : if (ctx->attach_in_progress > 0) {
6484 0 : DISCOVERY_INFOLOG(ctx, "stopping discovery with attach_in_progress: %"PRIu32"\n",
6485 : ctx->attach_in_progress);
6486 : }
6487 :
6488 0 : _stop_discovery(ctx);
6489 0 : }
6490 :
6491 : static void
6492 2 : remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr)
6493 : {
6494 : struct discovery_ctx *d_ctx;
6495 : struct nvme_path_id *path_id;
6496 2 : struct spdk_nvme_transport_id trid = {};
6497 : struct discovery_entry_ctx *entry_ctx, *tmp;
6498 :
6499 2 : path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
6500 :
6501 2 : TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
6502 0 : TAILQ_FOREACH_SAFE(entry_ctx, &d_ctx->nvm_entry_ctxs, tailq, tmp) {
6503 0 : build_trid_from_log_page_entry(&trid, &entry_ctx->entry);
6504 0 : if (spdk_nvme_transport_id_compare(&trid, &path_id->trid) != 0) {
6505 0 : continue;
6506 : }
6507 :
6508 0 : TAILQ_REMOVE(&d_ctx->nvm_entry_ctxs, entry_ctx, tailq);
6509 0 : free(entry_ctx);
6510 0 : DISCOVERY_INFOLOG(d_ctx, "Remove discovery entry: %s:%s:%s\n",
6511 : trid.subnqn, trid.traddr, trid.trsvcid);
6512 :
6513 : /* Fail discovery ctrlr to force reattach attempt */
6514 0 : spdk_nvme_ctrlr_fail(d_ctx->ctrlr);
6515 : }
6516 : }
6517 2 : }
6518 :
6519 : static void
6520 0 : discovery_remove_controllers(struct discovery_ctx *ctx)
6521 : {
6522 0 : struct spdk_nvmf_discovery_log_page *log_page = ctx->log_page;
6523 : struct discovery_entry_ctx *entry_ctx, *tmp;
6524 : struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
6525 0 : struct spdk_nvme_transport_id old_trid = {};
6526 : uint64_t numrec, i;
6527 : bool found;
6528 :
6529 0 : numrec = from_le64(&log_page->numrec);
6530 0 : TAILQ_FOREACH_SAFE(entry_ctx, &ctx->nvm_entry_ctxs, tailq, tmp) {
6531 0 : found = false;
6532 0 : old_entry = &entry_ctx->entry;
6533 0 : build_trid_from_log_page_entry(&old_trid, old_entry);
6534 0 : for (i = 0; i < numrec; i++) {
6535 0 : new_entry = &log_page->entries[i];
6536 0 : if (!memcmp(old_entry, new_entry, sizeof(*old_entry))) {
6537 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s found again\n",
6538 : old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
6539 0 : found = true;
6540 0 : break;
6541 : }
6542 : }
6543 0 : if (!found) {
6544 0 : struct nvme_path_id path = {};
6545 :
6546 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s not found\n",
6547 : old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
6548 :
6549 0 : path.trid = entry_ctx->trid;
6550 0 : bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
6551 0 : TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
6552 0 : free(entry_ctx);
6553 : }
6554 : }
6555 0 : free(log_page);
6556 0 : ctx->log_page = NULL;
6557 0 : discovery_complete(ctx);
6558 0 : }
6559 :
6560 : static void
6561 0 : complete_discovery_start(struct discovery_ctx *ctx, int status)
6562 : {
6563 0 : ctx->timeout_ticks = 0;
6564 0 : ctx->rc = status;
6565 0 : if (ctx->start_cb_fn) {
6566 0 : ctx->start_cb_fn(ctx->cb_ctx, status);
6567 0 : ctx->start_cb_fn = NULL;
6568 0 : ctx->cb_ctx = NULL;
6569 : }
6570 0 : }
6571 :
6572 : static void
6573 0 : discovery_attach_controller_done(void *cb_ctx, size_t bdev_count, int rc)
6574 : {
6575 0 : struct discovery_entry_ctx *entry_ctx = cb_ctx;
6576 0 : struct discovery_ctx *ctx = entry_ctx->ctx;
6577 :
6578 0 : DISCOVERY_INFOLOG(ctx, "attach %s done\n", entry_ctx->name);
6579 0 : ctx->attach_in_progress--;
6580 0 : if (ctx->attach_in_progress == 0) {
6581 0 : complete_discovery_start(ctx, ctx->rc);
6582 0 : if (ctx->initializing && ctx->rc != 0) {
6583 0 : DISCOVERY_ERRLOG(ctx, "stopping discovery due to errors: %d\n", ctx->rc);
6584 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
6585 : } else {
6586 0 : discovery_remove_controllers(ctx);
6587 : }
6588 : }
6589 0 : }
6590 :
6591 : static struct discovery_entry_ctx *
6592 0 : create_discovery_entry_ctx(struct discovery_ctx *ctx, struct spdk_nvme_transport_id *trid)
6593 : {
6594 : struct discovery_entry_ctx *new_ctx;
6595 :
6596 0 : new_ctx = calloc(1, sizeof(*new_ctx));
6597 0 : if (new_ctx == NULL) {
6598 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6599 0 : return NULL;
6600 : }
6601 :
6602 0 : new_ctx->ctx = ctx;
6603 0 : memcpy(&new_ctx->trid, trid, sizeof(*trid));
6604 0 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
6605 0 : snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
6606 0 : return new_ctx;
6607 : }
6608 :
6609 : static void
6610 0 : discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
6611 : struct spdk_nvmf_discovery_log_page *log_page)
6612 : {
6613 0 : struct discovery_ctx *ctx = cb_arg;
6614 : struct discovery_entry_ctx *entry_ctx, *tmp;
6615 : struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
6616 : uint64_t numrec, i;
6617 : bool found;
6618 :
6619 0 : if (rc || spdk_nvme_cpl_is_error(cpl)) {
6620 0 : DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
6621 0 : return;
6622 : }
6623 :
6624 0 : ctx->log_page = log_page;
6625 0 : assert(ctx->attach_in_progress == 0);
6626 0 : numrec = from_le64(&log_page->numrec);
6627 0 : TAILQ_FOREACH_SAFE(entry_ctx, &ctx->discovery_entry_ctxs, tailq, tmp) {
6628 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
6629 0 : free(entry_ctx);
6630 : }
6631 0 : for (i = 0; i < numrec; i++) {
6632 0 : found = false;
6633 0 : new_entry = &log_page->entries[i];
6634 0 : if (new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT ||
6635 0 : new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
6636 : struct discovery_entry_ctx *new_ctx;
6637 0 : struct spdk_nvme_transport_id trid = {};
6638 :
6639 0 : build_trid_from_log_page_entry(&trid, new_entry);
6640 0 : new_ctx = create_discovery_entry_ctx(ctx, &trid);
6641 0 : if (new_ctx == NULL) {
6642 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6643 0 : break;
6644 : }
6645 :
6646 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, new_ctx, tailq);
6647 0 : continue;
6648 : }
6649 0 : TAILQ_FOREACH(entry_ctx, &ctx->nvm_entry_ctxs, tailq) {
6650 0 : old_entry = &entry_ctx->entry;
6651 0 : if (!memcmp(new_entry, old_entry, sizeof(*new_entry))) {
6652 0 : found = true;
6653 0 : break;
6654 : }
6655 : }
6656 0 : if (!found) {
6657 0 : struct discovery_entry_ctx *subnqn_ctx = NULL, *new_ctx;
6658 : struct discovery_ctx *d_ctx;
6659 :
6660 0 : TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
6661 0 : TAILQ_FOREACH(subnqn_ctx, &d_ctx->nvm_entry_ctxs, tailq) {
6662 0 : if (!memcmp(subnqn_ctx->entry.subnqn, new_entry->subnqn,
6663 : sizeof(new_entry->subnqn))) {
6664 0 : break;
6665 : }
6666 : }
6667 0 : if (subnqn_ctx) {
6668 0 : break;
6669 : }
6670 : }
6671 :
6672 0 : new_ctx = calloc(1, sizeof(*new_ctx));
6673 0 : if (new_ctx == NULL) {
6674 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6675 0 : break;
6676 : }
6677 :
6678 0 : new_ctx->ctx = ctx;
6679 0 : memcpy(&new_ctx->entry, new_entry, sizeof(*new_entry));
6680 0 : build_trid_from_log_page_entry(&new_ctx->trid, new_entry);
6681 0 : if (subnqn_ctx) {
6682 0 : snprintf(new_ctx->name, sizeof(new_ctx->name), "%s", subnqn_ctx->name);
6683 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new path for %s\n",
6684 : new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
6685 : new_ctx->name);
6686 : } else {
6687 0 : snprintf(new_ctx->name, sizeof(new_ctx->name), "%s%d", ctx->name, ctx->index++);
6688 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new subsystem %s\n",
6689 : new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
6690 : new_ctx->name);
6691 : }
6692 0 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
6693 0 : snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
6694 0 : rc = bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0,
6695 : discovery_attach_controller_done, new_ctx,
6696 : &new_ctx->drv_opts, &ctx->bdev_opts, true);
6697 0 : if (rc == 0) {
6698 0 : TAILQ_INSERT_TAIL(&ctx->nvm_entry_ctxs, new_ctx, tailq);
6699 0 : ctx->attach_in_progress++;
6700 : } else {
6701 0 : DISCOVERY_ERRLOG(ctx, "bdev_nvme_create failed (%s)\n", spdk_strerror(-rc));
6702 : }
6703 : }
6704 : }
6705 :
6706 0 : if (ctx->attach_in_progress == 0) {
6707 0 : discovery_remove_controllers(ctx);
6708 : }
6709 : }
6710 :
6711 : static void
6712 0 : get_discovery_log_page(struct discovery_ctx *ctx)
6713 : {
6714 : int rc;
6715 :
6716 0 : assert(ctx->in_progress == false);
6717 0 : ctx->in_progress = true;
6718 0 : rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx);
6719 0 : if (rc != 0) {
6720 0 : DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
6721 : }
6722 0 : DISCOVERY_INFOLOG(ctx, "sent discovery log page command\n");
6723 0 : }
6724 :
6725 : static void
6726 0 : discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
6727 : {
6728 0 : struct discovery_ctx *ctx = arg;
6729 0 : uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
6730 :
6731 0 : if (spdk_nvme_cpl_is_error(cpl)) {
6732 0 : DISCOVERY_ERRLOG(ctx, "aer failed\n");
6733 0 : return;
6734 : }
6735 :
6736 0 : if (log_page_id != SPDK_NVME_LOG_DISCOVERY) {
6737 0 : DISCOVERY_ERRLOG(ctx, "unexpected log page 0x%x\n", log_page_id);
6738 0 : return;
6739 : }
6740 :
6741 0 : DISCOVERY_INFOLOG(ctx, "got aer\n");
6742 0 : if (ctx->in_progress) {
6743 0 : ctx->pending = true;
6744 0 : return;
6745 : }
6746 :
6747 0 : get_discovery_log_page(ctx);
6748 : }
6749 :
6750 : static void
6751 0 : discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6752 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
6753 : {
6754 0 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
6755 : struct discovery_ctx *ctx;
6756 :
6757 0 : ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, drv_opts);
6758 :
6759 0 : DISCOVERY_INFOLOG(ctx, "discovery ctrlr attached\n");
6760 0 : ctx->probe_ctx = NULL;
6761 0 : ctx->ctrlr = ctrlr;
6762 :
6763 0 : if (ctx->rc != 0) {
6764 0 : DISCOVERY_ERRLOG(ctx, "encountered error while attaching discovery ctrlr: %d\n",
6765 : ctx->rc);
6766 0 : return;
6767 : }
6768 :
6769 0 : spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx);
6770 : }
6771 :
6772 : static int
6773 0 : discovery_poller(void *arg)
6774 : {
6775 0 : struct discovery_ctx *ctx = arg;
6776 : struct spdk_nvme_transport_id *trid;
6777 : int rc;
6778 :
6779 0 : if (ctx->detach_ctx) {
6780 0 : rc = spdk_nvme_detach_poll_async(ctx->detach_ctx);
6781 0 : if (rc != -EAGAIN) {
6782 0 : ctx->detach_ctx = NULL;
6783 0 : ctx->ctrlr = NULL;
6784 : }
6785 0 : } else if (ctx->stop) {
6786 0 : if (ctx->ctrlr != NULL) {
6787 0 : rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
6788 0 : if (rc == 0) {
6789 0 : return SPDK_POLLER_BUSY;
6790 : }
6791 0 : DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
6792 : }
6793 0 : spdk_poller_unregister(&ctx->poller);
6794 0 : TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
6795 0 : assert(ctx->start_cb_fn == NULL);
6796 0 : if (ctx->stop_cb_fn != NULL) {
6797 0 : ctx->stop_cb_fn(ctx->cb_ctx);
6798 : }
6799 0 : free_discovery_ctx(ctx);
6800 0 : } else if (ctx->probe_ctx == NULL && ctx->ctrlr == NULL) {
6801 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
6802 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
6803 0 : assert(ctx->initializing);
6804 0 : spdk_poller_unregister(&ctx->poller);
6805 0 : TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
6806 0 : complete_discovery_start(ctx, -ETIMEDOUT);
6807 0 : stop_discovery(ctx, NULL, NULL);
6808 0 : free_discovery_ctx(ctx);
6809 0 : return SPDK_POLLER_BUSY;
6810 : }
6811 :
6812 0 : assert(ctx->entry_ctx_in_use == NULL);
6813 0 : ctx->entry_ctx_in_use = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
6814 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
6815 0 : trid = &ctx->entry_ctx_in_use->trid;
6816 0 : ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, discovery_attach_cb);
6817 0 : if (ctx->probe_ctx) {
6818 0 : spdk_poller_unregister(&ctx->poller);
6819 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000);
6820 : } else {
6821 0 : DISCOVERY_ERRLOG(ctx, "could not start discovery connect\n");
6822 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
6823 0 : ctx->entry_ctx_in_use = NULL;
6824 : }
6825 0 : } else if (ctx->probe_ctx) {
6826 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
6827 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
6828 0 : complete_discovery_start(ctx, -ETIMEDOUT);
6829 0 : return SPDK_POLLER_BUSY;
6830 : }
6831 :
6832 0 : rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
6833 0 : if (rc != -EAGAIN) {
6834 0 : if (ctx->rc != 0) {
6835 0 : assert(ctx->initializing);
6836 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
6837 : } else {
6838 0 : assert(rc == 0);
6839 0 : DISCOVERY_INFOLOG(ctx, "discovery ctrlr connected\n");
6840 0 : ctx->rc = rc;
6841 0 : get_discovery_log_page(ctx);
6842 : }
6843 : }
6844 : } else {
6845 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
6846 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching NVM ctrlrs\n");
6847 0 : complete_discovery_start(ctx, -ETIMEDOUT);
6848 : /* We need to wait until all NVM ctrlrs are attached before we stop the
6849 : * discovery service to make sure we don't detach a ctrlr that is still
6850 : * being attached.
6851 : */
6852 0 : if (ctx->attach_in_progress == 0) {
6853 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
6854 0 : return SPDK_POLLER_BUSY;
6855 : }
6856 : }
6857 :
6858 0 : rc = spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr);
6859 0 : if (rc < 0) {
6860 0 : spdk_poller_unregister(&ctx->poller);
6861 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
6862 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
6863 0 : ctx->entry_ctx_in_use = NULL;
6864 :
6865 0 : rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
6866 0 : if (rc != 0) {
6867 0 : DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
6868 0 : ctx->ctrlr = NULL;
6869 : }
6870 : }
6871 : }
6872 :
6873 0 : return SPDK_POLLER_BUSY;
6874 : }
6875 :
6876 : static void
6877 0 : start_discovery_poller(void *arg)
6878 : {
6879 0 : struct discovery_ctx *ctx = arg;
6880 :
6881 0 : TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq);
6882 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
6883 0 : }
6884 :
6885 : int
6886 0 : bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid,
6887 : const char *base_name,
6888 : struct spdk_nvme_ctrlr_opts *drv_opts,
6889 : struct nvme_ctrlr_opts *bdev_opts,
6890 : uint64_t attach_timeout,
6891 : bool from_mdns,
6892 : spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx)
6893 : {
6894 : struct discovery_ctx *ctx;
6895 : struct discovery_entry_ctx *discovery_entry_ctx;
6896 :
6897 0 : snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
6898 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
6899 0 : if (strcmp(ctx->name, base_name) == 0) {
6900 0 : return -EEXIST;
6901 : }
6902 :
6903 0 : if (ctx->entry_ctx_in_use != NULL) {
6904 0 : if (!spdk_nvme_transport_id_compare(trid, &ctx->entry_ctx_in_use->trid)) {
6905 0 : return -EEXIST;
6906 : }
6907 : }
6908 :
6909 0 : TAILQ_FOREACH(discovery_entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
6910 0 : if (!spdk_nvme_transport_id_compare(trid, &discovery_entry_ctx->trid)) {
6911 0 : return -EEXIST;
6912 : }
6913 : }
6914 : }
6915 :
6916 0 : ctx = calloc(1, sizeof(*ctx));
6917 0 : if (ctx == NULL) {
6918 0 : return -ENOMEM;
6919 : }
6920 :
6921 0 : ctx->name = strdup(base_name);
6922 0 : if (ctx->name == NULL) {
6923 0 : free_discovery_ctx(ctx);
6924 0 : return -ENOMEM;
6925 : }
6926 0 : memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
6927 0 : memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
6928 0 : ctx->from_mdns_discovery_service = from_mdns;
6929 0 : ctx->bdev_opts.from_discovery_service = true;
6930 0 : ctx->calling_thread = spdk_get_thread();
6931 0 : ctx->start_cb_fn = cb_fn;
6932 0 : ctx->cb_ctx = cb_ctx;
6933 0 : ctx->initializing = true;
6934 0 : if (ctx->start_cb_fn) {
6935 : /* We can use this when dumping json to denote if this RPC parameter
6936 : * was specified or not.
6937 : */
6938 0 : ctx->wait_for_attach = true;
6939 : }
6940 0 : if (attach_timeout != 0) {
6941 0 : ctx->timeout_ticks = spdk_get_ticks() + attach_timeout *
6942 0 : spdk_get_ticks_hz() / 1000ull;
6943 : }
6944 0 : TAILQ_INIT(&ctx->nvm_entry_ctxs);
6945 0 : TAILQ_INIT(&ctx->discovery_entry_ctxs);
6946 0 : memcpy(&ctx->trid, trid, sizeof(*trid));
6947 : /* Even if user did not specify hostnqn, we can still strdup("\0"); */
6948 0 : ctx->hostnqn = strdup(ctx->drv_opts.hostnqn);
6949 0 : if (ctx->hostnqn == NULL) {
6950 0 : free_discovery_ctx(ctx);
6951 0 : return -ENOMEM;
6952 : }
6953 0 : discovery_entry_ctx = create_discovery_entry_ctx(ctx, trid);
6954 0 : if (discovery_entry_ctx == NULL) {
6955 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6956 0 : free_discovery_ctx(ctx);
6957 0 : return -ENOMEM;
6958 : }
6959 :
6960 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, discovery_entry_ctx, tailq);
6961 0 : spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx);
6962 0 : return 0;
6963 : }
6964 :
6965 : int
6966 0 : bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
6967 : {
6968 : struct discovery_ctx *ctx;
6969 :
6970 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
6971 0 : if (strcmp(name, ctx->name) == 0) {
6972 0 : if (ctx->stop) {
6973 0 : return -EALREADY;
6974 : }
6975 : /* If we're still starting the discovery service and ->rc is non-zero, we're
6976 : * going to stop it as soon as we can
6977 : */
6978 0 : if (ctx->initializing && ctx->rc != 0) {
6979 0 : return -EALREADY;
6980 : }
6981 0 : stop_discovery(ctx, cb_fn, cb_ctx);
6982 0 : return 0;
6983 : }
6984 : }
6985 :
6986 0 : return -ENOENT;
6987 : }
6988 :
6989 : static int
6990 1 : bdev_nvme_library_init(void)
6991 : {
6992 1 : g_bdev_nvme_init_thread = spdk_get_thread();
6993 :
6994 1 : spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_create_poll_group_cb,
6995 : bdev_nvme_destroy_poll_group_cb,
6996 : sizeof(struct nvme_poll_group), "nvme_poll_groups");
6997 :
6998 1 : return 0;
6999 : }
7000 :
7001 : static void
7002 1 : bdev_nvme_fini_destruct_ctrlrs(void)
7003 : {
7004 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
7005 : struct nvme_ctrlr *nvme_ctrlr;
7006 :
7007 1 : pthread_mutex_lock(&g_bdev_nvme_mutex);
7008 1 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
7009 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
7010 0 : pthread_mutex_lock(&nvme_ctrlr->mutex);
7011 0 : if (nvme_ctrlr->destruct) {
7012 : /* This controller's destruction was already started
7013 : * before the application started shutting down
7014 : */
7015 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
7016 0 : continue;
7017 : }
7018 0 : nvme_ctrlr->destruct = true;
7019 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
7020 :
7021 0 : spdk_thread_send_msg(nvme_ctrlr->thread, _nvme_ctrlr_destruct,
7022 : nvme_ctrlr);
7023 : }
7024 : }
7025 :
7026 1 : g_bdev_nvme_module_finish = true;
7027 1 : if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
7028 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
7029 1 : spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
7030 1 : spdk_bdev_module_fini_done();
7031 1 : return;
7032 : }
7033 :
7034 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
7035 : }
7036 :
7037 : static void
7038 0 : check_discovery_fini(void *arg)
7039 : {
7040 0 : if (TAILQ_EMPTY(&g_discovery_ctxs)) {
7041 0 : bdev_nvme_fini_destruct_ctrlrs();
7042 : }
7043 0 : }
7044 :
7045 : static void
7046 1 : bdev_nvme_library_fini(void)
7047 : {
7048 : struct nvme_probe_skip_entry *entry, *entry_tmp;
7049 : struct discovery_ctx *ctx;
7050 :
7051 1 : spdk_poller_unregister(&g_hotplug_poller);
7052 1 : free(g_hotplug_probe_ctx);
7053 1 : g_hotplug_probe_ctx = NULL;
7054 :
7055 1 : TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
7056 0 : TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
7057 0 : free(entry);
7058 : }
7059 :
7060 1 : assert(spdk_get_thread() == g_bdev_nvme_init_thread);
7061 1 : if (TAILQ_EMPTY(&g_discovery_ctxs)) {
7062 1 : bdev_nvme_fini_destruct_ctrlrs();
7063 : } else {
7064 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7065 0 : stop_discovery(ctx, check_discovery_fini, NULL);
7066 : }
7067 : }
7068 1 : }
7069 :
7070 : static void
7071 0 : bdev_nvme_verify_pi_error(struct nvme_bdev_io *bio)
7072 : {
7073 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7074 0 : struct spdk_bdev *bdev = bdev_io->bdev;
7075 0 : struct spdk_dif_ctx dif_ctx;
7076 0 : struct spdk_dif_error err_blk = {};
7077 : int rc;
7078 0 : struct spdk_dif_ctx_init_ext_opts dif_opts;
7079 :
7080 0 : dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
7081 0 : dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
7082 0 : rc = spdk_dif_ctx_init(&dif_ctx,
7083 0 : bdev->blocklen, bdev->md_len, bdev->md_interleave,
7084 0 : bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags,
7085 0 : bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0, &dif_opts);
7086 0 : if (rc != 0) {
7087 0 : SPDK_ERRLOG("Initialization of DIF context failed\n");
7088 0 : return;
7089 : }
7090 :
7091 0 : if (bdev->md_interleave) {
7092 0 : rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
7093 0 : bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
7094 : } else {
7095 0 : struct iovec md_iov = {
7096 0 : .iov_base = bdev_io->u.bdev.md_buf,
7097 0 : .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len,
7098 : };
7099 :
7100 0 : rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
7101 0 : &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
7102 : }
7103 :
7104 0 : if (rc != 0) {
7105 0 : SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
7106 : err_blk.err_type, err_blk.err_offset);
7107 : } else {
7108 0 : SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
7109 : }
7110 : }
7111 :
7112 : static void
7113 0 : bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7114 : {
7115 0 : struct nvme_bdev_io *bio = ref;
7116 :
7117 0 : if (spdk_nvme_cpl_is_success(cpl)) {
7118 : /* Run PI verification for read data buffer. */
7119 0 : bdev_nvme_verify_pi_error(bio);
7120 : }
7121 :
7122 : /* Return original completion status */
7123 0 : bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
7124 0 : }
7125 :
7126 : static void
7127 3 : bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7128 : {
7129 3 : struct nvme_bdev_io *bio = ref;
7130 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7131 : int ret;
7132 :
7133 3 : if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
7134 0 : SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
7135 : cpl->status.sct, cpl->status.sc);
7136 :
7137 : /* Save completion status to use after verifying PI error. */
7138 0 : bio->cpl = *cpl;
7139 :
7140 0 : if (spdk_likely(nvme_io_path_is_available(bio->io_path))) {
7141 : /* Read without PI checking to verify PI error. */
7142 0 : ret = bdev_nvme_no_pi_readv(bio,
7143 : bdev_io->u.bdev.iovs,
7144 : bdev_io->u.bdev.iovcnt,
7145 : bdev_io->u.bdev.md_buf,
7146 : bdev_io->u.bdev.num_blocks,
7147 : bdev_io->u.bdev.offset_blocks);
7148 0 : if (ret == 0) {
7149 0 : return;
7150 : }
7151 : }
7152 : }
7153 :
7154 3 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7155 : }
7156 :
7157 : static void
7158 25 : bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7159 : {
7160 25 : struct nvme_bdev_io *bio = ref;
7161 :
7162 25 : if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
7163 0 : SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
7164 : cpl->status.sct, cpl->status.sc);
7165 : /* Run PI verification for write data buffer if PI error is detected. */
7166 0 : bdev_nvme_verify_pi_error(bio);
7167 : }
7168 :
7169 25 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7170 25 : }
7171 :
7172 : static void
7173 0 : bdev_nvme_zone_appendv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7174 : {
7175 0 : struct nvme_bdev_io *bio = ref;
7176 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7177 :
7178 : /* spdk_bdev_io_get_append_location() requires that the ALBA is stored in offset_blocks.
7179 : * Additionally, offset_blocks has to be set before calling bdev_nvme_verify_pi_error().
7180 : */
7181 0 : bdev_io->u.bdev.offset_blocks = *(uint64_t *)&cpl->cdw0;
7182 :
7183 0 : if (spdk_nvme_cpl_is_pi_error(cpl)) {
7184 0 : SPDK_ERRLOG("zone append completed with PI error (sct=%d, sc=%d)\n",
7185 : cpl->status.sct, cpl->status.sc);
7186 : /* Run PI verification for zone append data buffer if PI error is detected. */
7187 0 : bdev_nvme_verify_pi_error(bio);
7188 : }
7189 :
7190 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7191 0 : }
7192 :
7193 : static void
7194 1 : bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7195 : {
7196 1 : struct nvme_bdev_io *bio = ref;
7197 :
7198 1 : if (spdk_nvme_cpl_is_pi_error(cpl)) {
7199 0 : SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n",
7200 : cpl->status.sct, cpl->status.sc);
7201 : /* Run PI verification for compare data buffer if PI error is detected. */
7202 0 : bdev_nvme_verify_pi_error(bio);
7203 : }
7204 :
7205 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7206 1 : }
7207 :
7208 : static void
7209 4 : bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7210 : {
7211 4 : struct nvme_bdev_io *bio = ref;
7212 :
7213 : /* Compare operation completion */
7214 4 : if (!bio->first_fused_completed) {
7215 : /* Save compare result for write callback */
7216 2 : bio->cpl = *cpl;
7217 2 : bio->first_fused_completed = true;
7218 2 : return;
7219 : }
7220 :
7221 : /* Write operation completion */
7222 2 : if (spdk_nvme_cpl_is_error(&bio->cpl)) {
7223 : /* If bio->cpl is already an error, it means the compare operation failed. In that case,
7224 : * complete the IO with the compare operation's status.
7225 : */
7226 1 : if (!spdk_nvme_cpl_is_error(cpl)) {
7227 1 : SPDK_ERRLOG("Unexpected write success after compare failure.\n");
7228 : }
7229 :
7230 1 : bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
7231 : } else {
7232 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7233 : }
7234 : }
7235 :
7236 : static void
7237 1 : bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
7238 : {
7239 1 : struct nvme_bdev_io *bio = ref;
7240 :
7241 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7242 1 : }
7243 :
7244 : static int
7245 0 : fill_zone_from_report(struct spdk_bdev_zone_info *info, struct spdk_nvme_zns_zone_desc *desc)
7246 : {
7247 0 : switch (desc->zt) {
7248 0 : case SPDK_NVME_ZONE_TYPE_SEQWR:
7249 0 : info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
7250 0 : break;
7251 0 : default:
7252 0 : SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", desc->zt);
7253 0 : return -EIO;
7254 : }
7255 :
7256 0 : switch (desc->zs) {
7257 0 : case SPDK_NVME_ZONE_STATE_EMPTY:
7258 0 : info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
7259 0 : break;
7260 0 : case SPDK_NVME_ZONE_STATE_IOPEN:
7261 0 : info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
7262 0 : break;
7263 0 : case SPDK_NVME_ZONE_STATE_EOPEN:
7264 0 : info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
7265 0 : break;
7266 0 : case SPDK_NVME_ZONE_STATE_CLOSED:
7267 0 : info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
7268 0 : break;
7269 0 : case SPDK_NVME_ZONE_STATE_RONLY:
7270 0 : info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
7271 0 : break;
7272 0 : case SPDK_NVME_ZONE_STATE_FULL:
7273 0 : info->state = SPDK_BDEV_ZONE_STATE_FULL;
7274 0 : break;
7275 0 : case SPDK_NVME_ZONE_STATE_OFFLINE:
7276 0 : info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
7277 0 : break;
7278 0 : default:
7279 0 : SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", desc->zs);
7280 0 : return -EIO;
7281 : }
7282 :
7283 0 : info->zone_id = desc->zslba;
7284 0 : info->write_pointer = desc->wp;
7285 0 : info->capacity = desc->zcap;
7286 :
7287 0 : return 0;
7288 : }
7289 :
7290 : static void
7291 0 : bdev_nvme_get_zone_info_done(void *ref, const struct spdk_nvme_cpl *cpl)
7292 : {
7293 0 : struct nvme_bdev_io *bio = ref;
7294 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7295 0 : uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
7296 0 : uint32_t zones_to_copy = bdev_io->u.zone_mgmt.num_zones;
7297 0 : struct spdk_bdev_zone_info *info = bdev_io->u.zone_mgmt.buf;
7298 : uint64_t max_zones_per_buf, i;
7299 : uint32_t zone_report_bufsize;
7300 : struct spdk_nvme_ns *ns;
7301 : struct spdk_nvme_qpair *qpair;
7302 : int ret;
7303 :
7304 0 : if (spdk_nvme_cpl_is_error(cpl)) {
7305 0 : goto out_complete_io_nvme_cpl;
7306 : }
7307 :
7308 0 : if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
7309 0 : ret = -ENXIO;
7310 0 : goto out_complete_io_ret;
7311 : }
7312 :
7313 0 : ns = bio->io_path->nvme_ns->ns;
7314 0 : qpair = bio->io_path->qpair->qpair;
7315 :
7316 0 : zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
7317 0 : max_zones_per_buf = (zone_report_bufsize - sizeof(*bio->zone_report_buf)) /
7318 : sizeof(bio->zone_report_buf->descs[0]);
7319 :
7320 0 : if (bio->zone_report_buf->nr_zones > max_zones_per_buf) {
7321 0 : ret = -EINVAL;
7322 0 : goto out_complete_io_ret;
7323 : }
7324 :
7325 0 : if (!bio->zone_report_buf->nr_zones) {
7326 0 : ret = -EINVAL;
7327 0 : goto out_complete_io_ret;
7328 : }
7329 :
7330 0 : for (i = 0; i < bio->zone_report_buf->nr_zones && bio->handled_zones < zones_to_copy; i++) {
7331 0 : ret = fill_zone_from_report(&info[bio->handled_zones],
7332 0 : &bio->zone_report_buf->descs[i]);
7333 0 : if (ret) {
7334 0 : goto out_complete_io_ret;
7335 : }
7336 0 : bio->handled_zones++;
7337 : }
7338 :
7339 0 : if (bio->handled_zones < zones_to_copy) {
7340 0 : uint64_t zone_size_lba = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
7341 0 : uint64_t slba = zone_id + (zone_size_lba * bio->handled_zones);
7342 :
7343 0 : memset(bio->zone_report_buf, 0, zone_report_bufsize);
7344 0 : ret = spdk_nvme_zns_report_zones(ns, qpair,
7345 0 : bio->zone_report_buf, zone_report_bufsize,
7346 : slba, SPDK_NVME_ZRA_LIST_ALL, true,
7347 : bdev_nvme_get_zone_info_done, bio);
7348 0 : if (!ret) {
7349 0 : return;
7350 : } else {
7351 0 : goto out_complete_io_ret;
7352 : }
7353 : }
7354 :
7355 0 : out_complete_io_nvme_cpl:
7356 0 : free(bio->zone_report_buf);
7357 0 : bio->zone_report_buf = NULL;
7358 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7359 0 : return;
7360 :
7361 0 : out_complete_io_ret:
7362 0 : free(bio->zone_report_buf);
7363 0 : bio->zone_report_buf = NULL;
7364 0 : bdev_nvme_io_complete(bio, ret);
7365 : }
7366 :
7367 : static void
7368 0 : bdev_nvme_zone_management_done(void *ref, const struct spdk_nvme_cpl *cpl)
7369 : {
7370 0 : struct nvme_bdev_io *bio = ref;
7371 :
7372 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7373 0 : }
7374 :
7375 : static void
7376 4 : bdev_nvme_admin_passthru_complete_nvme_status(void *ctx)
7377 : {
7378 4 : struct nvme_bdev_io *bio = ctx;
7379 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7380 4 : const struct spdk_nvme_cpl *cpl = &bio->cpl;
7381 :
7382 4 : assert(bdev_nvme_io_type_is_admin(bdev_io->type));
7383 :
7384 4 : __bdev_nvme_io_complete(bdev_io, 0, cpl);
7385 4 : }
7386 :
7387 : static void
7388 3 : bdev_nvme_abort_complete(void *ctx)
7389 : {
7390 3 : struct nvme_bdev_io *bio = ctx;
7391 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7392 :
7393 3 : if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) {
7394 3 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS, NULL);
7395 : } else {
7396 0 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED, NULL);
7397 : }
7398 3 : }
7399 :
7400 : static void
7401 3 : bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl)
7402 : {
7403 3 : struct nvme_bdev_io *bio = ref;
7404 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7405 :
7406 3 : bio->cpl = *cpl;
7407 3 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_nvme_abort_complete, bio);
7408 3 : }
7409 :
7410 : static void
7411 4 : bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
7412 : {
7413 4 : struct nvme_bdev_io *bio = ref;
7414 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7415 :
7416 4 : bio->cpl = *cpl;
7417 4 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
7418 : bdev_nvme_admin_passthru_complete_nvme_status, bio);
7419 4 : }
7420 :
7421 : static void
7422 0 : bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
7423 : {
7424 0 : struct nvme_bdev_io *bio = ref;
7425 : struct iovec *iov;
7426 :
7427 0 : bio->iov_offset = sgl_offset;
7428 0 : for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
7429 0 : iov = &bio->iovs[bio->iovpos];
7430 0 : if (bio->iov_offset < iov->iov_len) {
7431 0 : break;
7432 : }
7433 :
7434 0 : bio->iov_offset -= iov->iov_len;
7435 : }
7436 0 : }
7437 :
7438 : static int
7439 0 : bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
7440 : {
7441 0 : struct nvme_bdev_io *bio = ref;
7442 : struct iovec *iov;
7443 :
7444 0 : assert(bio->iovpos < bio->iovcnt);
7445 :
7446 0 : iov = &bio->iovs[bio->iovpos];
7447 :
7448 0 : *address = iov->iov_base;
7449 0 : *length = iov->iov_len;
7450 :
7451 0 : if (bio->iov_offset) {
7452 0 : assert(bio->iov_offset <= iov->iov_len);
7453 0 : *address += bio->iov_offset;
7454 0 : *length -= bio->iov_offset;
7455 : }
7456 :
7457 0 : bio->iov_offset += *length;
7458 0 : if (bio->iov_offset == iov->iov_len) {
7459 0 : bio->iovpos++;
7460 0 : bio->iov_offset = 0;
7461 : }
7462 :
7463 0 : return 0;
7464 : }
7465 :
7466 : static void
7467 0 : bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset)
7468 : {
7469 0 : struct nvme_bdev_io *bio = ref;
7470 : struct iovec *iov;
7471 :
7472 0 : bio->fused_iov_offset = sgl_offset;
7473 0 : for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) {
7474 0 : iov = &bio->fused_iovs[bio->fused_iovpos];
7475 0 : if (bio->fused_iov_offset < iov->iov_len) {
7476 0 : break;
7477 : }
7478 :
7479 0 : bio->fused_iov_offset -= iov->iov_len;
7480 : }
7481 0 : }
7482 :
7483 : static int
7484 0 : bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length)
7485 : {
7486 0 : struct nvme_bdev_io *bio = ref;
7487 : struct iovec *iov;
7488 :
7489 0 : assert(bio->fused_iovpos < bio->fused_iovcnt);
7490 :
7491 0 : iov = &bio->fused_iovs[bio->fused_iovpos];
7492 :
7493 0 : *address = iov->iov_base;
7494 0 : *length = iov->iov_len;
7495 :
7496 0 : if (bio->fused_iov_offset) {
7497 0 : assert(bio->fused_iov_offset <= iov->iov_len);
7498 0 : *address += bio->fused_iov_offset;
7499 0 : *length -= bio->fused_iov_offset;
7500 : }
7501 :
7502 0 : bio->fused_iov_offset += *length;
7503 0 : if (bio->fused_iov_offset == iov->iov_len) {
7504 0 : bio->fused_iovpos++;
7505 0 : bio->fused_iov_offset = 0;
7506 : }
7507 :
7508 0 : return 0;
7509 : }
7510 :
7511 : static int
7512 0 : bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7513 : void *md, uint64_t lba_count, uint64_t lba)
7514 : {
7515 : int rc;
7516 :
7517 0 : SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 " without PI check\n",
7518 : lba_count, lba);
7519 :
7520 0 : bio->iovs = iov;
7521 0 : bio->iovcnt = iovcnt;
7522 0 : bio->iovpos = 0;
7523 0 : bio->iov_offset = 0;
7524 :
7525 0 : rc = spdk_nvme_ns_cmd_readv_with_md(bio->io_path->nvme_ns->ns,
7526 0 : bio->io_path->qpair->qpair,
7527 : lba, lba_count,
7528 : bdev_nvme_no_pi_readv_done, bio, 0,
7529 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
7530 : md, 0, 0);
7531 :
7532 0 : if (rc != 0 && rc != -ENOMEM) {
7533 0 : SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
7534 : }
7535 0 : return rc;
7536 : }
7537 :
7538 : static int
7539 3 : bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7540 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
7541 : struct spdk_memory_domain *domain, void *domain_ctx,
7542 : struct spdk_accel_sequence *seq)
7543 : {
7544 3 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7545 3 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7546 : int rc;
7547 :
7548 3 : SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7549 : lba_count, lba);
7550 :
7551 3 : bio->iovs = iov;
7552 3 : bio->iovcnt = iovcnt;
7553 3 : bio->iovpos = 0;
7554 3 : bio->iov_offset = 0;
7555 :
7556 3 : if (domain != NULL || seq != NULL) {
7557 1 : bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
7558 1 : bio->ext_opts.memory_domain = domain;
7559 1 : bio->ext_opts.memory_domain_ctx = domain_ctx;
7560 1 : bio->ext_opts.io_flags = flags;
7561 1 : bio->ext_opts.metadata = md;
7562 1 : bio->ext_opts.accel_sequence = seq;
7563 :
7564 1 : rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count,
7565 : bdev_nvme_readv_done, bio,
7566 : bdev_nvme_queued_reset_sgl,
7567 : bdev_nvme_queued_next_sge,
7568 : &bio->ext_opts);
7569 2 : } else if (iovcnt == 1) {
7570 2 : rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base,
7571 : md, lba, lba_count, bdev_nvme_readv_done,
7572 : bio, flags, 0, 0);
7573 : } else {
7574 0 : rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count,
7575 : bdev_nvme_readv_done, bio, flags,
7576 : bdev_nvme_queued_reset_sgl,
7577 : bdev_nvme_queued_next_sge, md, 0, 0);
7578 : }
7579 :
7580 3 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
7581 0 : SPDK_ERRLOG("readv failed: rc = %d\n", rc);
7582 : }
7583 3 : return rc;
7584 : }
7585 :
7586 : static int
7587 25 : bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7588 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
7589 : struct spdk_memory_domain *domain, void *domain_ctx,
7590 : struct spdk_accel_sequence *seq)
7591 : {
7592 25 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7593 25 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7594 : int rc;
7595 :
7596 25 : SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7597 : lba_count, lba);
7598 :
7599 25 : bio->iovs = iov;
7600 25 : bio->iovcnt = iovcnt;
7601 25 : bio->iovpos = 0;
7602 25 : bio->iov_offset = 0;
7603 :
7604 25 : if (domain != NULL || seq != NULL) {
7605 0 : bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
7606 0 : bio->ext_opts.memory_domain = domain;
7607 0 : bio->ext_opts.memory_domain_ctx = domain_ctx;
7608 0 : bio->ext_opts.io_flags = flags;
7609 0 : bio->ext_opts.metadata = md;
7610 0 : bio->ext_opts.accel_sequence = seq;
7611 :
7612 0 : rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count,
7613 : bdev_nvme_writev_done, bio,
7614 : bdev_nvme_queued_reset_sgl,
7615 : bdev_nvme_queued_next_sge,
7616 : &bio->ext_opts);
7617 25 : } else if (iovcnt == 1) {
7618 25 : rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base,
7619 : md, lba, lba_count, bdev_nvme_writev_done,
7620 : bio, flags, 0, 0);
7621 : } else {
7622 0 : rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
7623 : bdev_nvme_writev_done, bio, flags,
7624 : bdev_nvme_queued_reset_sgl,
7625 : bdev_nvme_queued_next_sge, md, 0, 0);
7626 : }
7627 :
7628 25 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
7629 0 : SPDK_ERRLOG("writev failed: rc = %d\n", rc);
7630 : }
7631 25 : return rc;
7632 : }
7633 :
7634 : static int
7635 0 : bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7636 : void *md, uint64_t lba_count, uint64_t zslba,
7637 : uint32_t flags)
7638 : {
7639 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7640 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7641 : int rc;
7642 :
7643 0 : SPDK_DEBUGLOG(bdev_nvme, "zone append %" PRIu64 " blocks to zone start lba %#" PRIx64 "\n",
7644 : lba_count, zslba);
7645 :
7646 0 : bio->iovs = iov;
7647 0 : bio->iovcnt = iovcnt;
7648 0 : bio->iovpos = 0;
7649 0 : bio->iov_offset = 0;
7650 :
7651 0 : if (iovcnt == 1) {
7652 0 : rc = spdk_nvme_zns_zone_append_with_md(ns, qpair, iov[0].iov_base, md, zslba,
7653 : lba_count,
7654 : bdev_nvme_zone_appendv_done, bio,
7655 : flags,
7656 : 0, 0);
7657 : } else {
7658 0 : rc = spdk_nvme_zns_zone_appendv_with_md(ns, qpair, zslba, lba_count,
7659 : bdev_nvme_zone_appendv_done, bio, flags,
7660 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
7661 : md, 0, 0);
7662 : }
7663 :
7664 0 : if (rc != 0 && rc != -ENOMEM) {
7665 0 : SPDK_ERRLOG("zone append failed: rc = %d\n", rc);
7666 : }
7667 0 : return rc;
7668 : }
7669 :
7670 : static int
7671 1 : bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7672 : void *md, uint64_t lba_count, uint64_t lba,
7673 : uint32_t flags)
7674 : {
7675 : int rc;
7676 :
7677 1 : SPDK_DEBUGLOG(bdev_nvme, "compare %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7678 : lba_count, lba);
7679 :
7680 1 : bio->iovs = iov;
7681 1 : bio->iovcnt = iovcnt;
7682 1 : bio->iovpos = 0;
7683 1 : bio->iov_offset = 0;
7684 :
7685 1 : rc = spdk_nvme_ns_cmd_comparev_with_md(bio->io_path->nvme_ns->ns,
7686 1 : bio->io_path->qpair->qpair,
7687 : lba, lba_count,
7688 : bdev_nvme_comparev_done, bio, flags,
7689 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
7690 : md, 0, 0);
7691 :
7692 1 : if (rc != 0 && rc != -ENOMEM) {
7693 0 : SPDK_ERRLOG("comparev failed: rc = %d\n", rc);
7694 : }
7695 1 : return rc;
7696 : }
7697 :
7698 : static int
7699 2 : bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt,
7700 : struct iovec *write_iov, int write_iovcnt,
7701 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags)
7702 : {
7703 2 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7704 2 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7705 2 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7706 : int rc;
7707 :
7708 2 : SPDK_DEBUGLOG(bdev_nvme, "compare and write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7709 : lba_count, lba);
7710 :
7711 2 : bio->iovs = cmp_iov;
7712 2 : bio->iovcnt = cmp_iovcnt;
7713 2 : bio->iovpos = 0;
7714 2 : bio->iov_offset = 0;
7715 2 : bio->fused_iovs = write_iov;
7716 2 : bio->fused_iovcnt = write_iovcnt;
7717 2 : bio->fused_iovpos = 0;
7718 2 : bio->fused_iov_offset = 0;
7719 :
7720 2 : if (bdev_io->num_retries == 0) {
7721 2 : bio->first_fused_submitted = false;
7722 2 : bio->first_fused_completed = false;
7723 : }
7724 :
7725 2 : if (!bio->first_fused_submitted) {
7726 2 : flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST;
7727 2 : memset(&bio->cpl, 0, sizeof(bio->cpl));
7728 :
7729 2 : rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count,
7730 : bdev_nvme_comparev_and_writev_done, bio, flags,
7731 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0);
7732 2 : if (rc == 0) {
7733 2 : bio->first_fused_submitted = true;
7734 2 : flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST;
7735 : } else {
7736 0 : if (rc != -ENOMEM) {
7737 0 : SPDK_ERRLOG("compare failed: rc = %d\n", rc);
7738 : }
7739 0 : return rc;
7740 : }
7741 : }
7742 :
7743 2 : flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND;
7744 :
7745 2 : rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
7746 : bdev_nvme_comparev_and_writev_done, bio, flags,
7747 : bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0);
7748 2 : if (rc != 0 && rc != -ENOMEM) {
7749 0 : SPDK_ERRLOG("write failed: rc = %d\n", rc);
7750 0 : rc = 0;
7751 : }
7752 :
7753 2 : return rc;
7754 : }
7755 :
7756 : static int
7757 1 : bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
7758 : {
7759 1 : struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
7760 : struct spdk_nvme_dsm_range *range;
7761 : uint64_t offset, remaining;
7762 : uint64_t num_ranges_u64;
7763 : uint16_t num_ranges;
7764 : int rc;
7765 :
7766 1 : num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
7767 : SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
7768 1 : if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
7769 0 : SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
7770 0 : return -EINVAL;
7771 : }
7772 1 : num_ranges = (uint16_t)num_ranges_u64;
7773 :
7774 1 : offset = offset_blocks;
7775 1 : remaining = num_blocks;
7776 1 : range = &dsm_ranges[0];
7777 :
7778 : /* Fill max-size ranges until the remaining blocks fit into one range */
7779 1 : while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
7780 0 : range->attributes.raw = 0;
7781 0 : range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
7782 0 : range->starting_lba = offset;
7783 :
7784 0 : offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
7785 0 : remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
7786 0 : range++;
7787 : }
7788 :
7789 : /* Final range describes the remaining blocks */
7790 1 : range->attributes.raw = 0;
7791 1 : range->length = remaining;
7792 1 : range->starting_lba = offset;
7793 :
7794 1 : rc = spdk_nvme_ns_cmd_dataset_management(bio->io_path->nvme_ns->ns,
7795 1 : bio->io_path->qpair->qpair,
7796 : SPDK_NVME_DSM_ATTR_DEALLOCATE,
7797 : dsm_ranges, num_ranges,
7798 : bdev_nvme_queued_done, bio);
7799 :
7800 1 : return rc;
7801 : }
7802 :
7803 : static int
7804 0 : bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
7805 : {
7806 0 : if (num_blocks > UINT16_MAX + 1) {
7807 0 : SPDK_ERRLOG("NVMe write zeroes is limited to 16-bit block count\n");
7808 0 : return -EINVAL;
7809 : }
7810 :
7811 0 : return spdk_nvme_ns_cmd_write_zeroes(bio->io_path->nvme_ns->ns,
7812 0 : bio->io_path->qpair->qpair,
7813 : offset_blocks, num_blocks,
7814 : bdev_nvme_queued_done, bio,
7815 : 0);
7816 : }
7817 :
7818 : static int
7819 0 : bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones,
7820 : struct spdk_bdev_zone_info *info)
7821 : {
7822 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7823 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7824 0 : uint32_t zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
7825 0 : uint64_t zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
7826 0 : uint64_t total_zones = spdk_nvme_zns_ns_get_num_zones(ns);
7827 :
7828 0 : if (zone_id % zone_size != 0) {
7829 0 : return -EINVAL;
7830 : }
7831 :
7832 0 : if (num_zones > total_zones || !num_zones) {
7833 0 : return -EINVAL;
7834 : }
7835 :
7836 0 : assert(!bio->zone_report_buf);
7837 0 : bio->zone_report_buf = calloc(1, zone_report_bufsize);
7838 0 : if (!bio->zone_report_buf) {
7839 0 : return -ENOMEM;
7840 : }
7841 :
7842 0 : bio->handled_zones = 0;
7843 :
7844 0 : return spdk_nvme_zns_report_zones(ns, qpair, bio->zone_report_buf, zone_report_bufsize,
7845 : zone_id, SPDK_NVME_ZRA_LIST_ALL, true,
7846 : bdev_nvme_get_zone_info_done, bio);
7847 : }
7848 :
7849 : static int
7850 0 : bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
7851 : enum spdk_bdev_zone_action action)
7852 : {
7853 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7854 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7855 :
7856 0 : switch (action) {
7857 0 : case SPDK_BDEV_ZONE_CLOSE:
7858 0 : return spdk_nvme_zns_close_zone(ns, qpair, zone_id, false,
7859 : bdev_nvme_zone_management_done, bio);
7860 0 : case SPDK_BDEV_ZONE_FINISH:
7861 0 : return spdk_nvme_zns_finish_zone(ns, qpair, zone_id, false,
7862 : bdev_nvme_zone_management_done, bio);
7863 0 : case SPDK_BDEV_ZONE_OPEN:
7864 0 : return spdk_nvme_zns_open_zone(ns, qpair, zone_id, false,
7865 : bdev_nvme_zone_management_done, bio);
7866 0 : case SPDK_BDEV_ZONE_RESET:
7867 0 : return spdk_nvme_zns_reset_zone(ns, qpair, zone_id, false,
7868 : bdev_nvme_zone_management_done, bio);
7869 0 : case SPDK_BDEV_ZONE_OFFLINE:
7870 0 : return spdk_nvme_zns_offline_zone(ns, qpair, zone_id, false,
7871 : bdev_nvme_zone_management_done, bio);
7872 0 : default:
7873 0 : return -EINVAL;
7874 : }
7875 : }
7876 :
7877 : static void
7878 5 : bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
7879 : struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
7880 : {
7881 : struct nvme_io_path *io_path;
7882 : struct nvme_ctrlr *nvme_ctrlr;
7883 : uint32_t max_xfer_size;
7884 5 : int rc = -ENXIO;
7885 :
7886 : /* Choose the first ctrlr which is not failed. */
7887 8 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
7888 7 : nvme_ctrlr = io_path->qpair->ctrlr;
7889 :
7890 : /* We should skip any unavailable nvme_ctrlr rather than checking
7891 : * if the return value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
7892 : */
7893 7 : if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
7894 3 : continue;
7895 : }
7896 :
7897 4 : max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ctrlr->ctrlr);
7898 :
7899 4 : if (nbytes > max_xfer_size) {
7900 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
7901 0 : rc = -EINVAL;
7902 0 : goto err;
7903 : }
7904 :
7905 4 : rc = spdk_nvme_ctrlr_cmd_admin_raw(nvme_ctrlr->ctrlr, cmd, buf, (uint32_t)nbytes,
7906 : bdev_nvme_admin_passthru_done, bio);
7907 4 : if (rc == 0) {
7908 4 : return;
7909 : }
7910 : }
7911 :
7912 1 : err:
7913 1 : bdev_nvme_admin_complete(bio, rc);
7914 : }
7915 :
7916 : static int
7917 0 : bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
7918 : void *buf, size_t nbytes)
7919 : {
7920 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7921 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7922 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
7923 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
7924 :
7925 0 : if (nbytes > max_xfer_size) {
7926 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
7927 0 : return -EINVAL;
7928 : }
7929 :
7930 : /*
7931 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
7932 : * so fill it out automatically.
7933 : */
7934 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
7935 :
7936 0 : return spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, cmd, buf,
7937 : (uint32_t)nbytes, bdev_nvme_queued_done, bio);
7938 : }
7939 :
7940 : static int
7941 0 : bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
7942 : void *buf, size_t nbytes, void *md_buf, size_t md_len)
7943 : {
7944 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7945 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7946 0 : size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
7947 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
7948 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
7949 :
7950 0 : if (nbytes > max_xfer_size) {
7951 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
7952 0 : return -EINVAL;
7953 : }
7954 :
7955 0 : if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
7956 0 : SPDK_ERRLOG("invalid meta data buffer size\n");
7957 0 : return -EINVAL;
7958 : }
7959 :
7960 : /*
7961 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
7962 : * so fill it out automatically.
7963 : */
7964 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
7965 :
7966 0 : return spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, qpair, cmd, buf,
7967 : (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
7968 : }
7969 :
7970 : static int
7971 0 : bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio,
7972 : struct spdk_nvme_cmd *cmd, struct iovec *iov, int iovcnt,
7973 : size_t nbytes, void *md_buf, size_t md_len)
7974 : {
7975 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7976 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7977 0 : size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
7978 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
7979 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
7980 :
7981 0 : bio->iovs = iov;
7982 0 : bio->iovcnt = iovcnt;
7983 0 : bio->iovpos = 0;
7984 0 : bio->iov_offset = 0;
7985 :
7986 0 : if (nbytes > max_xfer_size) {
7987 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
7988 0 : return -EINVAL;
7989 : }
7990 :
7991 0 : if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
7992 0 : SPDK_ERRLOG("invalid meta data buffer size\n");
7993 0 : return -EINVAL;
7994 : }
7995 :
7996 : /*
7997 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands
7998 : * require a nsid, so fill it out automatically.
7999 : */
8000 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8001 :
8002 0 : return spdk_nvme_ctrlr_cmd_iov_raw_with_md(
8003 : ctrlr, qpair, cmd, (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio,
8004 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
8005 : }
8006 :
8007 : static void
8008 6 : bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
8009 : struct nvme_bdev_io *bio_to_abort)
8010 : {
8011 : struct nvme_io_path *io_path;
8012 6 : int rc = 0;
8013 :
8014 6 : rc = bdev_nvme_abort_retry_io(nbdev_ch, bio_to_abort);
8015 6 : if (rc == 0) {
8016 1 : bdev_nvme_admin_complete(bio, 0);
8017 1 : return;
8018 : }
8019 :
8020 5 : io_path = bio_to_abort->io_path;
8021 5 : if (io_path != NULL) {
8022 3 : rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
8023 3 : io_path->qpair->qpair,
8024 : bio_to_abort,
8025 : bdev_nvme_abort_done, bio);
8026 : } else {
8027 3 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
8028 2 : rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
8029 : NULL,
8030 : bio_to_abort,
8031 : bdev_nvme_abort_done, bio);
8032 :
8033 2 : if (rc != -ENOENT) {
8034 1 : break;
8035 : }
8036 : }
8037 : }
8038 :
8039 5 : if (rc != 0) {
8040 : /* If no command was found or there was any error, complete the abort
8041 : * request with failure.
8042 : */
8043 2 : bdev_nvme_admin_complete(bio, rc);
8044 : }
8045 : }
8046 :
8047 : static int
8048 0 : bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
8049 : uint64_t num_blocks)
8050 : {
8051 0 : struct spdk_nvme_scc_source_range range = {
8052 : .slba = src_offset_blocks,
8053 0 : .nlb = num_blocks - 1
8054 : };
8055 :
8056 0 : return spdk_nvme_ns_cmd_copy(bio->io_path->nvme_ns->ns,
8057 0 : bio->io_path->qpair->qpair,
8058 : &range, 1, dst_offset_blocks,
8059 : bdev_nvme_queued_done, bio);
8060 : }
8061 :
8062 : static void
8063 0 : bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
8064 : {
8065 : const char *action;
8066 :
8067 0 : if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
8068 0 : action = "reset";
8069 0 : } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
8070 0 : action = "abort";
8071 : } else {
8072 0 : action = "none";
8073 : }
8074 :
8075 0 : spdk_json_write_object_begin(w);
8076 :
8077 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_set_options");
8078 :
8079 0 : spdk_json_write_named_object_begin(w, "params");
8080 0 : spdk_json_write_named_string(w, "action_on_timeout", action);
8081 0 : spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
8082 0 : spdk_json_write_named_uint64(w, "timeout_admin_us", g_opts.timeout_admin_us);
8083 0 : spdk_json_write_named_uint32(w, "keep_alive_timeout_ms", g_opts.keep_alive_timeout_ms);
8084 0 : spdk_json_write_named_uint32(w, "transport_retry_count", g_opts.transport_retry_count);
8085 0 : spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst);
8086 0 : spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight);
8087 0 : spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight);
8088 0 : spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight);
8089 0 : spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
8090 0 : spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
8091 0 : spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
8092 0 : spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
8093 0 : spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
8094 0 : spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
8095 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
8096 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
8097 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
8098 0 : spdk_json_write_named_bool(w, "generate_uuids", g_opts.generate_uuids);
8099 0 : spdk_json_write_named_uint8(w, "transport_tos", g_opts.transport_tos);
8100 0 : spdk_json_write_named_bool(w, "io_path_stat", g_opts.io_path_stat);
8101 0 : spdk_json_write_named_bool(w, "allow_accel_sequence", g_opts.allow_accel_sequence);
8102 0 : spdk_json_write_object_end(w);
8103 :
8104 0 : spdk_json_write_object_end(w);
8105 0 : }
8106 :
8107 : static void
8108 0 : bdev_nvme_discovery_config_json(struct spdk_json_write_ctx *w, struct discovery_ctx *ctx)
8109 : {
8110 0 : struct spdk_nvme_transport_id trid;
8111 :
8112 0 : spdk_json_write_object_begin(w);
8113 :
8114 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_start_discovery");
8115 :
8116 0 : spdk_json_write_named_object_begin(w, "params");
8117 0 : spdk_json_write_named_string(w, "name", ctx->name);
8118 0 : spdk_json_write_named_string(w, "hostnqn", ctx->hostnqn);
8119 :
8120 0 : trid = ctx->trid;
8121 0 : memset(trid.subnqn, 0, sizeof(trid.subnqn));
8122 0 : nvme_bdev_dump_trid_json(&trid, w);
8123 :
8124 0 : spdk_json_write_named_bool(w, "wait_for_attach", ctx->wait_for_attach);
8125 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", ctx->bdev_opts.ctrlr_loss_timeout_sec);
8126 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", ctx->bdev_opts.reconnect_delay_sec);
8127 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
8128 : ctx->bdev_opts.fast_io_fail_timeout_sec);
8129 0 : spdk_json_write_object_end(w);
8130 :
8131 0 : spdk_json_write_object_end(w);
8132 0 : }
8133 :
8134 : #ifdef SPDK_CONFIG_NVME_CUSE
8135 : static void
8136 0 : nvme_ctrlr_cuse_config_json(struct spdk_json_write_ctx *w,
8137 : struct nvme_ctrlr *nvme_ctrlr)
8138 0 : {
8139 0 : size_t cuse_name_size = 128;
8140 0 : char cuse_name[cuse_name_size];
8141 :
8142 0 : if (spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr,
8143 : cuse_name, &cuse_name_size) != 0) {
8144 0 : return;
8145 : }
8146 :
8147 0 : spdk_json_write_object_begin(w);
8148 :
8149 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_cuse_register");
8150 :
8151 0 : spdk_json_write_named_object_begin(w, "params");
8152 0 : spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
8153 0 : spdk_json_write_object_end(w);
8154 :
8155 0 : spdk_json_write_object_end(w);
8156 : }
8157 : #endif
8158 :
8159 : static void
8160 0 : nvme_ctrlr_config_json(struct spdk_json_write_ctx *w,
8161 : struct nvme_ctrlr *nvme_ctrlr)
8162 : {
8163 : struct spdk_nvme_transport_id *trid;
8164 : const struct spdk_nvme_ctrlr_opts *opts;
8165 :
8166 0 : if (nvme_ctrlr->opts.from_discovery_service) {
8167 : /* Do not emit an RPC for this - it will be implicitly
8168 : * covered by a separate bdev_nvme_start_discovery or
8169 : * bdev_nvme_start_mdns_discovery RPC.
8170 : */
8171 0 : return;
8172 : }
8173 :
8174 0 : trid = &nvme_ctrlr->active_path_id->trid;
8175 :
8176 0 : spdk_json_write_object_begin(w);
8177 :
8178 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller");
8179 :
8180 0 : spdk_json_write_named_object_begin(w, "params");
8181 0 : spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
8182 0 : nvme_bdev_dump_trid_json(trid, w);
8183 0 : spdk_json_write_named_bool(w, "prchk_reftag",
8184 0 : (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
8185 0 : spdk_json_write_named_bool(w, "prchk_guard",
8186 0 : (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
8187 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->opts.ctrlr_loss_timeout_sec);
8188 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->opts.reconnect_delay_sec);
8189 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
8190 : nvme_ctrlr->opts.fast_io_fail_timeout_sec);
8191 0 : if (nvme_ctrlr->opts.psk_path[0] != '\0') {
8192 0 : spdk_json_write_named_string(w, "psk", nvme_ctrlr->opts.psk_path);
8193 : }
8194 :
8195 0 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
8196 0 : spdk_json_write_named_string(w, "hostnqn", opts->hostnqn);
8197 0 : spdk_json_write_named_bool(w, "hdgst", opts->header_digest);
8198 0 : spdk_json_write_named_bool(w, "ddgst", opts->data_digest);
8199 :
8200 0 : spdk_json_write_object_end(w);
8201 :
8202 0 : spdk_json_write_object_end(w);
8203 : }
8204 :
8205 : static void
8206 0 : bdev_nvme_hotplug_config_json(struct spdk_json_write_ctx *w)
8207 : {
8208 0 : spdk_json_write_object_begin(w);
8209 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug");
8210 :
8211 0 : spdk_json_write_named_object_begin(w, "params");
8212 0 : spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
8213 0 : spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
8214 0 : spdk_json_write_object_end(w);
8215 :
8216 0 : spdk_json_write_object_end(w);
8217 0 : }
8218 :
8219 : static int
8220 0 : bdev_nvme_config_json(struct spdk_json_write_ctx *w)
8221 : {
8222 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
8223 : struct nvme_ctrlr *nvme_ctrlr;
8224 : struct discovery_ctx *ctx;
8225 :
8226 0 : bdev_nvme_opts_config_json(w);
8227 :
8228 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
8229 :
8230 0 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
8231 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
8232 0 : nvme_ctrlr_config_json(w, nvme_ctrlr);
8233 :
8234 : #ifdef SPDK_CONFIG_NVME_CUSE
8235 0 : nvme_ctrlr_cuse_config_json(w, nvme_ctrlr);
8236 : #endif
8237 : }
8238 : }
8239 :
8240 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
8241 0 : if (!ctx->from_mdns_discovery_service) {
8242 0 : bdev_nvme_discovery_config_json(w, ctx);
8243 : }
8244 : }
8245 :
8246 0 : bdev_nvme_mdns_discovery_config_json(w);
8247 :
8248 : /* Dump as last parameter to give all NVMe bdevs chance to be constructed
8249 : * before enabling hotplug poller.
8250 : */
8251 0 : bdev_nvme_hotplug_config_json(w);
8252 :
8253 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
8254 0 : return 0;
8255 : }
8256 :
8257 : struct spdk_nvme_ctrlr *
8258 1 : bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
8259 : {
8260 : struct nvme_bdev *nbdev;
8261 : struct nvme_ns *nvme_ns;
8262 :
8263 1 : if (!bdev || bdev->module != &nvme_if) {
8264 0 : return NULL;
8265 : }
8266 :
8267 1 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
8268 1 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
8269 1 : assert(nvme_ns != NULL);
8270 :
8271 1 : return nvme_ns->ctrlr->ctrlr;
8272 : }
8273 :
8274 : void
8275 0 : nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path)
8276 : {
8277 0 : struct nvme_ns *nvme_ns = io_path->nvme_ns;
8278 0 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
8279 : const struct spdk_nvme_ctrlr_data *cdata;
8280 : const struct spdk_nvme_transport_id *trid;
8281 : const char *adrfam_str;
8282 :
8283 0 : spdk_json_write_object_begin(w);
8284 :
8285 0 : spdk_json_write_named_string(w, "bdev_name", nvme_ns->bdev->disk.name);
8286 :
8287 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
8288 0 : trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);
8289 :
8290 0 : spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
8291 0 : spdk_json_write_named_bool(w, "current", io_path->nbdev_ch != NULL &&
8292 0 : io_path == io_path->nbdev_ch->current_io_path);
8293 0 : spdk_json_write_named_bool(w, "connected", nvme_qpair_is_connected(io_path->qpair));
8294 0 : spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));
8295 :
8296 0 : spdk_json_write_named_object_begin(w, "transport");
8297 0 : spdk_json_write_named_string(w, "trtype", trid->trstring);
8298 0 : spdk_json_write_named_string(w, "traddr", trid->traddr);
8299 0 : if (trid->trsvcid[0] != '\0') {
8300 0 : spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
8301 : }
8302 0 : adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
8303 0 : if (adrfam_str) {
8304 0 : spdk_json_write_named_string(w, "adrfam", adrfam_str);
8305 : }
8306 0 : spdk_json_write_object_end(w);
8307 :
8308 0 : spdk_json_write_object_end(w);
8309 0 : }
8310 :
8311 : void
8312 0 : bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w)
8313 : {
8314 : struct discovery_ctx *ctx;
8315 : struct discovery_entry_ctx *entry_ctx;
8316 :
8317 0 : spdk_json_write_array_begin(w);
8318 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
8319 0 : spdk_json_write_object_begin(w);
8320 0 : spdk_json_write_named_string(w, "name", ctx->name);
8321 :
8322 0 : spdk_json_write_named_object_begin(w, "trid");
8323 0 : nvme_bdev_dump_trid_json(&ctx->trid, w);
8324 0 : spdk_json_write_object_end(w);
8325 :
8326 0 : spdk_json_write_named_array_begin(w, "referrals");
8327 0 : TAILQ_FOREACH(entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
8328 0 : spdk_json_write_object_begin(w);
8329 0 : spdk_json_write_named_object_begin(w, "trid");
8330 0 : nvme_bdev_dump_trid_json(&entry_ctx->trid, w);
8331 0 : spdk_json_write_object_end(w);
8332 0 : spdk_json_write_object_end(w);
8333 : }
8334 0 : spdk_json_write_array_end(w);
8335 :
8336 0 : spdk_json_write_object_end(w);
8337 : }
8338 0 : spdk_json_write_array_end(w);
8339 0 : }
8340 :
8341 1 : SPDK_LOG_REGISTER_COMPONENT(bdev_nvme)
8342 :
8343 1 : SPDK_TRACE_REGISTER_FN(bdev_nvme_trace, "bdev_nvme", TRACE_GROUP_BDEV_NVME)
8344 : {
8345 0 : struct spdk_trace_tpoint_opts opts[] = {
8346 : {
8347 : "BDEV_NVME_IO_START", TRACE_BDEV_NVME_IO_START,
8348 : OWNER_NONE, OBJECT_BDEV_NVME_IO, 1,
8349 : {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
8350 : },
8351 : {
8352 : "BDEV_NVME_IO_DONE", TRACE_BDEV_NVME_IO_DONE,
8353 : OWNER_NONE, OBJECT_BDEV_NVME_IO, 0,
8354 : {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
8355 : }
8356 : };
8357 :
8358 :
8359 0 : spdk_trace_register_object(OBJECT_BDEV_NVME_IO, 'N');
8360 0 : spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
8361 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
8362 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
8363 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
8364 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
8365 0 : }
|