Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2016 Intel Corporation. All rights reserved.
3 : * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4 : * Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : * Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved.
6 : */
7 :
8 : #include "spdk/stdinc.h"
9 :
10 : #include "bdev_nvme.h"
11 :
12 : #include "spdk/accel.h"
13 : #include "spdk/config.h"
14 : #include "spdk/endian.h"
15 : #include "spdk/bdev.h"
16 : #include "spdk/json.h"
17 : #include "spdk/keyring.h"
18 : #include "spdk/likely.h"
19 : #include "spdk/nvme.h"
20 : #include "spdk/nvme_ocssd.h"
21 : #include "spdk/nvme_zns.h"
22 : #include "spdk/opal.h"
23 : #include "spdk/thread.h"
24 : #include "spdk/trace.h"
25 : #include "spdk/string.h"
26 : #include "spdk/util.h"
27 : #include "spdk/uuid.h"
28 :
29 : #include "spdk/bdev_module.h"
30 : #include "spdk/log.h"
31 :
32 : #include "spdk_internal/usdt.h"
33 : #include "spdk_internal/trace_defs.h"
34 :
35 : #define CTRLR_STRING(nvme_ctrlr) \
36 : (spdk_nvme_trtype_is_fabrics(nvme_ctrlr->active_path_id->trid.trtype) ? \
37 : nvme_ctrlr->active_path_id->trid.subnqn : nvme_ctrlr->active_path_id->trid.traddr)
38 :
39 : #define CTRLR_ID(nvme_ctrlr) (spdk_nvme_ctrlr_get_id(nvme_ctrlr->ctrlr))
40 :
41 : #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \
42 : SPDK_ERRLOG("[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
43 :
44 : #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \
45 : SPDK_WARNLOG("[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
46 :
47 : #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \
48 : SPDK_NOTICELOG("[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
49 :
50 : #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \
51 : SPDK_INFOLOG(bdev_nvme, "[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
52 :
53 : #ifdef DEBUG
54 : #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \
55 : SPDK_DEBUGLOG(bdev_nvme, "[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
56 : #else
57 : #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0)
58 : #endif
59 :
60 : #define BDEV_STRING(nbdev) (nbdev->disk.name)
61 :
62 : #define NVME_BDEV_ERRLOG(nbdev, format, ...) \
63 : SPDK_ERRLOG("[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
64 :
65 : #define NVME_BDEV_WARNLOG(nbdev, format, ...) \
66 : SPDK_WARNLOG("[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
67 :
68 : #define NVME_BDEV_NOTICELOG(nbdev, format, ...) \
69 : SPDK_NOTICELOG("[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
70 :
71 : #define NVME_BDEV_INFOLOG(nbdev, format, ...) \
72 : SPDK_INFOLOG(bdev_nvme, "[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
73 :
74 : #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true
75 : #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
76 :
77 : #define NSID_STR_LEN 10
78 :
79 : #define SPDK_CONTROLLER_NAME_MAX 512
80 :
81 : static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
82 :
83 : struct nvme_bdev_io {
84 : /** array of iovecs to transfer. */
85 : struct iovec *iovs;
86 :
87 : /** Number of iovecs in iovs array. */
88 : int iovcnt;
89 :
90 : /** Current iovec position. */
91 : int iovpos;
92 :
93 : /** Offset in current iovec. */
94 : uint32_t iov_offset;
95 :
96 : /** Offset in current iovec. */
97 : uint32_t fused_iov_offset;
98 :
99 : /** array of iovecs to transfer. */
100 : struct iovec *fused_iovs;
101 :
102 : /** Number of iovecs in iovs array. */
103 : int fused_iovcnt;
104 :
105 : /** Current iovec position. */
106 : int fused_iovpos;
107 :
108 : /** I/O path the current I/O or admin passthrough is submitted on, or the I/O path
109 : * being reset in a reset I/O.
110 : */
111 : struct nvme_io_path *io_path;
112 :
113 : /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */
114 : struct spdk_nvme_cpl cpl;
115 :
116 : /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */
117 : struct spdk_nvme_ns_cmd_ext_io_opts ext_opts;
118 :
119 : /** Keeps track if first of fused commands was submitted */
120 : bool first_fused_submitted;
121 :
122 : /** Keeps track if first of fused commands was completed */
123 : bool first_fused_completed;
124 :
125 : /* How many times the current I/O was retried. */
126 : int32_t retry_count;
127 :
128 : /** Expiration value in ticks to retry the current I/O. */
129 : uint64_t retry_ticks;
130 :
131 : /** Temporary pointer to zone report buffer */
132 : struct spdk_nvme_zns_zone_report *zone_report_buf;
133 :
134 : /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */
135 : uint64_t handled_zones;
136 :
137 : /* Current tsc at submit time. */
138 : uint64_t submit_tsc;
139 :
140 : /* Used to put nvme_bdev_io into the list */
141 : TAILQ_ENTRY(nvme_bdev_io) retry_link;
142 : };
143 :
144 : struct nvme_probe_skip_entry {
145 : struct spdk_nvme_transport_id trid;
146 : TAILQ_ENTRY(nvme_probe_skip_entry) tailq;
147 : };
148 : /* All the controllers deleted by users via RPC are skipped by hotplug monitor */
149 : static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
150 : g_skipped_nvme_ctrlrs);
151 :
152 : #define BDEV_NVME_DEFAULT_DIGESTS (SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA256) | \
153 : SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA384) | \
154 : SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA512))
155 :
156 : #define BDEV_NVME_DEFAULT_DHGROUPS (SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_NULL) | \
157 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_2048) | \
158 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_3072) | \
159 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_4096) | \
160 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_6144) | \
161 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_8192))
162 :
163 : static struct spdk_bdev_nvme_opts g_opts = {
164 : .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
165 : .timeout_us = 0,
166 : .timeout_admin_us = 0,
167 : .keep_alive_timeout_ms = SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS,
168 : .transport_retry_count = 4,
169 : .arbitration_burst = 0,
170 : .low_priority_weight = 0,
171 : .medium_priority_weight = 0,
172 : .high_priority_weight = 0,
173 : .nvme_adminq_poll_period_us = 10000ULL,
174 : .nvme_ioq_poll_period_us = 0,
175 : .io_queue_requests = 0,
176 : .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
177 : .bdev_retry_count = 3,
178 : .transport_ack_timeout = 0,
179 : .ctrlr_loss_timeout_sec = 0,
180 : .reconnect_delay_sec = 0,
181 : .fast_io_fail_timeout_sec = 0,
182 : .disable_auto_failback = false,
183 : .generate_uuids = false,
184 : .transport_tos = 0,
185 : .nvme_error_stat = false,
186 : .io_path_stat = false,
187 : .allow_accel_sequence = false,
188 : .dhchap_digests = BDEV_NVME_DEFAULT_DIGESTS,
189 : .dhchap_dhgroups = BDEV_NVME_DEFAULT_DHGROUPS,
190 : };
191 :
192 : #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL
193 : #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL
194 :
195 : static int g_hot_insert_nvme_controller_index = 0;
196 : static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
197 : static bool g_nvme_hotplug_enabled = false;
198 : struct spdk_thread *g_bdev_nvme_init_thread;
199 : static struct spdk_poller *g_hotplug_poller;
200 : static struct spdk_poller *g_hotplug_probe_poller;
201 : static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
202 :
203 : static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
204 : struct nvme_async_probe_ctx *ctx);
205 : static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
206 : struct nvme_async_probe_ctx *ctx);
207 : static int bdev_nvme_library_init(void);
208 : static void bdev_nvme_library_fini(void);
209 : static void _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch,
210 : struct spdk_bdev_io *bdev_io);
211 : static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
212 : struct spdk_bdev_io *bdev_io);
213 : static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
214 : void *md, uint64_t lba_count, uint64_t lba,
215 : uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
216 : struct spdk_accel_sequence *seq);
217 : static int bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
218 : void *md, uint64_t lba_count, uint64_t lba);
219 : static int bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
220 : void *md, uint64_t lba_count, uint64_t lba,
221 : uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
222 : struct spdk_accel_sequence *seq,
223 : union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13);
224 : static int bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
225 : void *md, uint64_t lba_count,
226 : uint64_t zslba, uint32_t flags);
227 : static int bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
228 : void *md, uint64_t lba_count, uint64_t lba,
229 : uint32_t flags);
230 : static int bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio,
231 : struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
232 : int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba,
233 : uint32_t flags);
234 : static int bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id,
235 : uint32_t num_zones, struct spdk_bdev_zone_info *info);
236 : static int bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
237 : enum spdk_bdev_zone_action action);
238 : static void bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch,
239 : struct nvme_bdev_io *bio,
240 : struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
241 : static int bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
242 : void *buf, size_t nbytes);
243 : static int bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
244 : void *buf, size_t nbytes, void *md_buf, size_t md_len);
245 : static int bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
246 : struct iovec *iov, int iovcnt, size_t nbytes,
247 : void *md_buf, size_t md_len);
248 : static void bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch,
249 : struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
250 : static void bdev_nvme_reset_io(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio);
251 : static int bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
252 : static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
253 : static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
254 : static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr);
255 :
256 : static struct nvme_ns *nvme_ns_alloc(void);
257 : static void nvme_ns_free(struct nvme_ns *ns);
258 :
259 : static int
260 176 : nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2)
261 : {
262 176 : return ns1->id < ns2->id ? -1 : ns1->id > ns2->id;
263 : }
264 :
265 1109 : RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp);
266 :
267 : struct spdk_nvme_qpair *
268 1 : bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
269 : {
270 : struct nvme_ctrlr_channel *ctrlr_ch;
271 :
272 1 : assert(ctrlr_io_ch != NULL);
273 :
274 1 : ctrlr_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
275 :
276 1 : return ctrlr_ch->qpair->qpair;
277 : }
278 :
279 : static int
280 0 : bdev_nvme_get_ctx_size(void)
281 : {
282 0 : return sizeof(struct nvme_bdev_io);
283 : }
284 :
285 : static struct spdk_bdev_module nvme_if = {
286 : .name = "nvme",
287 : .async_fini = true,
288 : .module_init = bdev_nvme_library_init,
289 : .module_fini = bdev_nvme_library_fini,
290 : .config_json = bdev_nvme_config_json,
291 : .get_ctx_size = bdev_nvme_get_ctx_size,
292 :
293 : };
294 1 : SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
295 :
296 : struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_bdev_ctrlrs);
297 : pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
298 : bool g_bdev_nvme_module_finish;
299 :
300 : struct nvme_bdev_ctrlr *
301 356 : nvme_bdev_ctrlr_get_by_name(const char *name)
302 : {
303 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
304 :
305 368 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
306 198 : if (strcmp(name, nbdev_ctrlr->name) == 0) {
307 186 : break;
308 : }
309 12 : }
310 :
311 356 : return nbdev_ctrlr;
312 : }
313 :
314 : static struct nvme_ctrlr *
315 75 : nvme_bdev_ctrlr_get_ctrlr(struct nvme_bdev_ctrlr *nbdev_ctrlr,
316 : const struct spdk_nvme_transport_id *trid, const char *hostnqn)
317 : {
318 : const struct spdk_nvme_ctrlr_opts *opts;
319 : struct nvme_ctrlr *nvme_ctrlr;
320 :
321 133 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
322 102 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
323 102 : if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->active_path_id->trid) == 0 &&
324 44 : strcmp(hostnqn, opts->hostnqn) == 0) {
325 44 : break;
326 : }
327 58 : }
328 :
329 75 : return nvme_ctrlr;
330 : }
331 :
332 : struct nvme_ctrlr *
333 0 : nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr,
334 : uint16_t cntlid)
335 : {
336 : struct nvme_ctrlr *nvme_ctrlr;
337 : const struct spdk_nvme_ctrlr_data *cdata;
338 :
339 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
340 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
341 0 : if (cdata->cntlid == cntlid) {
342 0 : break;
343 : }
344 0 : }
345 :
346 0 : return nvme_ctrlr;
347 : }
348 :
349 : static struct nvme_bdev *
350 75 : nvme_bdev_ctrlr_get_bdev(struct nvme_bdev_ctrlr *nbdev_ctrlr, uint32_t nsid)
351 : {
352 : struct nvme_bdev *bdev;
353 :
354 75 : pthread_mutex_lock(&g_bdev_nvme_mutex);
355 109 : TAILQ_FOREACH(bdev, &nbdev_ctrlr->bdevs, tailq) {
356 69 : if (bdev->nsid == nsid) {
357 35 : break;
358 : }
359 34 : }
360 75 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
361 :
362 75 : return bdev;
363 : }
364 :
365 : struct nvme_ns *
366 145 : nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
367 : {
368 : struct nvme_ns ns;
369 :
370 145 : assert(nsid > 0);
371 :
372 145 : ns.id = nsid;
373 145 : return RB_FIND(nvme_ns_tree, &nvme_ctrlr->namespaces, &ns);
374 : }
375 :
376 : struct nvme_ns *
377 175 : nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr)
378 : {
379 175 : return RB_MIN(nvme_ns_tree, &nvme_ctrlr->namespaces);
380 : }
381 :
382 : struct nvme_ns *
383 74 : nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns)
384 : {
385 74 : if (ns == NULL) {
386 0 : return NULL;
387 : }
388 :
389 74 : return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
390 74 : }
391 :
392 : static struct nvme_ctrlr *
393 58 : nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid, const char *hostnqn)
394 : {
395 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
396 58 : struct nvme_ctrlr *nvme_ctrlr = NULL;
397 :
398 58 : pthread_mutex_lock(&g_bdev_nvme_mutex);
399 83 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
400 25 : nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, trid, hostnqn);
401 25 : if (nvme_ctrlr != NULL) {
402 0 : break;
403 : }
404 25 : }
405 58 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
406 :
407 58 : return nvme_ctrlr;
408 : }
409 :
410 : struct nvme_ctrlr *
411 131 : nvme_ctrlr_get_by_name(const char *name)
412 : {
413 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
414 131 : struct nvme_ctrlr *nvme_ctrlr = NULL;
415 :
416 131 : if (name == NULL) {
417 0 : return NULL;
418 : }
419 :
420 131 : pthread_mutex_lock(&g_bdev_nvme_mutex);
421 131 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
422 131 : if (nbdev_ctrlr != NULL) {
423 63 : nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
424 63 : }
425 131 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
426 :
427 131 : return nvme_ctrlr;
428 131 : }
429 :
430 : void
431 0 : nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx)
432 : {
433 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
434 :
435 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
436 0 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
437 0 : fn(nbdev_ctrlr, ctx);
438 0 : }
439 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
440 0 : }
441 :
442 : void
443 5 : spdk_bdev_nvme_get_each_spdk_nvme_ctrlr(spdk_bdev_nvme_get_each_spdk_nvme_ctrlr_fn fn, void *ctx)
444 : {
445 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
446 : struct nvme_ctrlr *nvme_ctrlr;
447 :
448 5 : pthread_mutex_lock(&g_bdev_nvme_mutex);
449 11 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
450 17 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
451 11 : fn(nvme_ctrlr->ctrlr, ctx);
452 11 : }
453 6 : }
454 5 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
455 5 : }
456 :
457 : struct nvme_ctrlr_channel_iter {
458 : nvme_ctrlr_for_each_channel_msg fn;
459 : nvme_ctrlr_for_each_channel_done cpl;
460 : struct spdk_io_channel_iter *i;
461 : void *ctx;
462 : };
463 :
464 : void
465 166 : nvme_ctrlr_for_each_channel_continue(struct nvme_ctrlr_channel_iter *iter, int status)
466 : {
467 166 : spdk_for_each_channel_continue(iter->i, status);
468 166 : }
469 :
470 : static void
471 166 : nvme_ctrlr_each_channel_msg(struct spdk_io_channel_iter *i)
472 : {
473 166 : struct nvme_ctrlr_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
474 166 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
475 166 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
476 166 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
477 :
478 166 : iter->i = i;
479 166 : iter->fn(iter, nvme_ctrlr, ctrlr_ch, iter->ctx);
480 166 : }
481 :
482 : static void
483 97 : nvme_ctrlr_each_channel_cpl(struct spdk_io_channel_iter *i, int status)
484 : {
485 97 : struct nvme_ctrlr_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
486 97 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
487 :
488 97 : iter->i = i;
489 97 : iter->cpl(nvme_ctrlr, iter->ctx, status);
490 :
491 97 : free(iter);
492 97 : }
493 :
494 : void
495 97 : nvme_ctrlr_for_each_channel(struct nvme_ctrlr *nvme_ctrlr,
496 : nvme_ctrlr_for_each_channel_msg fn, void *ctx,
497 : nvme_ctrlr_for_each_channel_done cpl)
498 : {
499 : struct nvme_ctrlr_channel_iter *iter;
500 :
501 97 : assert(nvme_ctrlr != NULL && fn != NULL);
502 :
503 97 : iter = calloc(1, sizeof(struct nvme_ctrlr_channel_iter));
504 97 : if (iter == NULL) {
505 0 : SPDK_ERRLOG("Unable to allocate iterator\n");
506 0 : assert(false);
507 : return;
508 : }
509 :
510 97 : iter->fn = fn;
511 97 : iter->cpl = cpl;
512 97 : iter->ctx = ctx;
513 :
514 194 : spdk_for_each_channel(nvme_ctrlr, nvme_ctrlr_each_channel_msg,
515 97 : iter, nvme_ctrlr_each_channel_cpl);
516 97 : }
517 :
518 : struct nvme_bdev_channel_iter {
519 : nvme_bdev_for_each_channel_msg fn;
520 : nvme_bdev_for_each_channel_done cpl;
521 : struct spdk_io_channel_iter *i;
522 : void *ctx;
523 : };
524 :
525 : void
526 69 : nvme_bdev_for_each_channel_continue(struct nvme_bdev_channel_iter *iter, int status)
527 : {
528 69 : spdk_for_each_channel_continue(iter->i, status);
529 69 : }
530 :
531 : static void
532 69 : nvme_bdev_each_channel_msg(struct spdk_io_channel_iter *i)
533 : {
534 69 : struct nvme_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
535 69 : struct nvme_bdev *nbdev = spdk_io_channel_iter_get_io_device(i);
536 69 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
537 69 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
538 :
539 69 : iter->i = i;
540 69 : iter->fn(iter, nbdev, nbdev_ch, iter->ctx);
541 69 : }
542 :
543 : static void
544 60 : nvme_bdev_each_channel_cpl(struct spdk_io_channel_iter *i, int status)
545 : {
546 60 : struct nvme_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
547 60 : struct nvme_bdev *nbdev = spdk_io_channel_iter_get_io_device(i);
548 :
549 60 : iter->i = i;
550 60 : iter->cpl(nbdev, iter->ctx, status);
551 :
552 60 : free(iter);
553 60 : }
554 :
555 : void
556 60 : nvme_bdev_for_each_channel(struct nvme_bdev *nbdev,
557 : nvme_bdev_for_each_channel_msg fn, void *ctx,
558 : nvme_bdev_for_each_channel_done cpl)
559 : {
560 : struct nvme_bdev_channel_iter *iter;
561 :
562 60 : assert(nbdev != NULL && fn != NULL);
563 :
564 60 : iter = calloc(1, sizeof(struct nvme_bdev_channel_iter));
565 60 : if (iter == NULL) {
566 0 : SPDK_ERRLOG("Unable to allocate iterator\n");
567 0 : assert(false);
568 : return;
569 : }
570 :
571 60 : iter->fn = fn;
572 60 : iter->cpl = cpl;
573 60 : iter->ctx = ctx;
574 :
575 60 : spdk_for_each_channel(nbdev, nvme_bdev_each_channel_msg, iter,
576 : nvme_bdev_each_channel_cpl);
577 60 : }
578 :
579 : void
580 0 : nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
581 : {
582 : const char *trtype_str;
583 : const char *adrfam_str;
584 :
585 0 : trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
586 0 : if (trtype_str) {
587 0 : spdk_json_write_named_string(w, "trtype", trtype_str);
588 0 : }
589 :
590 0 : adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
591 0 : if (adrfam_str) {
592 0 : spdk_json_write_named_string(w, "adrfam", adrfam_str);
593 0 : }
594 :
595 0 : if (trid->traddr[0] != '\0') {
596 0 : spdk_json_write_named_string(w, "traddr", trid->traddr);
597 0 : }
598 :
599 0 : if (trid->trsvcid[0] != '\0') {
600 0 : spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
601 0 : }
602 :
603 0 : if (trid->subnqn[0] != '\0') {
604 0 : spdk_json_write_named_string(w, "subnqn", trid->subnqn);
605 0 : }
606 0 : }
607 :
608 : static void
609 66 : nvme_bdev_ctrlr_delete(struct nvme_bdev_ctrlr *nbdev_ctrlr,
610 : struct nvme_ctrlr *nvme_ctrlr)
611 : {
612 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_delete, nvme_ctrlr->nbdev_ctrlr->name);
613 66 : pthread_mutex_lock(&g_bdev_nvme_mutex);
614 :
615 66 : TAILQ_REMOVE(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
616 66 : if (!TAILQ_EMPTY(&nbdev_ctrlr->ctrlrs)) {
617 18 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
618 :
619 18 : return;
620 : }
621 48 : TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
622 :
623 48 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
624 :
625 48 : assert(TAILQ_EMPTY(&nbdev_ctrlr->bdevs));
626 :
627 48 : free(nbdev_ctrlr->name);
628 48 : free(nbdev_ctrlr);
629 66 : }
630 :
631 : static void
632 67 : _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
633 : {
634 : struct nvme_path_id *path_id, *tmp_path;
635 : struct nvme_ns *ns, *tmp_ns;
636 :
637 67 : free(nvme_ctrlr->copied_ana_desc);
638 67 : spdk_free(nvme_ctrlr->ana_log_page);
639 :
640 67 : if (nvme_ctrlr->opal_dev) {
641 0 : spdk_opal_dev_destruct(nvme_ctrlr->opal_dev);
642 0 : nvme_ctrlr->opal_dev = NULL;
643 0 : }
644 :
645 67 : if (nvme_ctrlr->nbdev_ctrlr) {
646 66 : nvme_bdev_ctrlr_delete(nvme_ctrlr->nbdev_ctrlr, nvme_ctrlr);
647 66 : }
648 :
649 67 : RB_FOREACH_SAFE(ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp_ns) {
650 0 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
651 0 : nvme_ns_free(ns);
652 0 : }
653 :
654 134 : TAILQ_FOREACH_SAFE(path_id, &nvme_ctrlr->trids, link, tmp_path) {
655 67 : TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
656 67 : free(path_id);
657 67 : }
658 :
659 67 : pthread_mutex_destroy(&nvme_ctrlr->mutex);
660 67 : spdk_keyring_put_key(nvme_ctrlr->psk);
661 67 : spdk_keyring_put_key(nvme_ctrlr->dhchap_key);
662 67 : spdk_keyring_put_key(nvme_ctrlr->dhchap_ctrlr_key);
663 67 : free(nvme_ctrlr);
664 :
665 67 : pthread_mutex_lock(&g_bdev_nvme_mutex);
666 67 : if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
667 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
668 0 : spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
669 0 : spdk_bdev_module_fini_done();
670 0 : return;
671 : }
672 67 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
673 67 : }
674 :
675 : static int
676 67 : nvme_detach_poller(void *arg)
677 : {
678 67 : struct nvme_ctrlr *nvme_ctrlr = arg;
679 : int rc;
680 :
681 67 : rc = spdk_nvme_detach_poll_async(nvme_ctrlr->detach_ctx);
682 67 : if (rc != -EAGAIN) {
683 67 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
684 67 : _nvme_ctrlr_delete(nvme_ctrlr);
685 67 : }
686 :
687 67 : return SPDK_POLLER_BUSY;
688 : }
689 :
690 : static void
691 67 : nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
692 : {
693 : int rc;
694 :
695 67 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
696 :
697 67 : if (spdk_interrupt_mode_is_enabled()) {
698 0 : spdk_interrupt_unregister(&nvme_ctrlr->intr);
699 0 : }
700 :
701 : /* First, unregister the adminq poller, as the driver will poll adminq if necessary */
702 67 : spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
703 :
704 : /* If we got here, the reset/detach poller cannot be active */
705 67 : assert(nvme_ctrlr->reset_detach_poller == NULL);
706 67 : nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(nvme_detach_poller,
707 : nvme_ctrlr, 1000);
708 67 : if (nvme_ctrlr->reset_detach_poller == NULL) {
709 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to register detach poller\n");
710 0 : goto error;
711 : }
712 :
713 67 : rc = spdk_nvme_detach_async(nvme_ctrlr->ctrlr, &nvme_ctrlr->detach_ctx);
714 67 : if (rc != 0) {
715 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to detach the NVMe controller\n");
716 0 : goto error;
717 : }
718 :
719 67 : return;
720 : error:
721 : /* We don't have a good way to handle errors here, so just do what we can and delete the
722 : * controller without detaching the underlying NVMe device.
723 : */
724 0 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
725 0 : _nvme_ctrlr_delete(nvme_ctrlr);
726 67 : }
727 :
728 : static void
729 66 : nvme_ctrlr_unregister_cb(void *io_device)
730 : {
731 66 : struct nvme_ctrlr *nvme_ctrlr = io_device;
732 :
733 66 : nvme_ctrlr_delete(nvme_ctrlr);
734 66 : }
735 :
736 : static void
737 66 : nvme_ctrlr_unregister(void *ctx)
738 : {
739 66 : struct nvme_ctrlr *nvme_ctrlr = ctx;
740 :
741 66 : spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb);
742 66 : }
743 :
744 : static bool
745 254 : nvme_ctrlr_can_be_unregistered(struct nvme_ctrlr *nvme_ctrlr)
746 : {
747 254 : if (!nvme_ctrlr->destruct) {
748 131 : return false;
749 : }
750 :
751 123 : if (nvme_ctrlr->ref > 0) {
752 57 : return false;
753 : }
754 :
755 66 : if (nvme_ctrlr->resetting) {
756 0 : return false;
757 : }
758 :
759 66 : if (nvme_ctrlr->ana_log_page_updating) {
760 0 : return false;
761 : }
762 :
763 66 : if (nvme_ctrlr->io_path_cache_clearing) {
764 0 : return false;
765 : }
766 :
767 66 : return true;
768 254 : }
769 :
770 : static void
771 177 : nvme_ctrlr_put_ref(struct nvme_ctrlr *nvme_ctrlr)
772 : {
773 177 : pthread_mutex_lock(&nvme_ctrlr->mutex);
774 : SPDK_DTRACE_PROBE2(bdev_nvme_ctrlr_release, nvme_ctrlr->nbdev_ctrlr->name, nvme_ctrlr->ref);
775 :
776 177 : assert(nvme_ctrlr->ref > 0);
777 177 : nvme_ctrlr->ref--;
778 :
779 177 : if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
780 111 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
781 111 : return;
782 : }
783 :
784 66 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
785 :
786 66 : spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr);
787 177 : }
788 :
789 : static void
790 111 : nvme_ctrlr_get_ref(struct nvme_ctrlr *nvme_ctrlr)
791 : {
792 111 : pthread_mutex_lock(&nvme_ctrlr->mutex);
793 111 : nvme_ctrlr->ref++;
794 111 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
795 111 : }
796 :
797 : static void
798 259 : bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch)
799 : {
800 259 : nbdev_ch->current_io_path = NULL;
801 259 : nbdev_ch->rr_counter = 0;
802 259 : }
803 :
804 : static struct nvme_io_path *
805 8 : _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
806 : {
807 : struct nvme_io_path *io_path;
808 :
809 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
810 15 : if (io_path->nvme_ns == nvme_ns) {
811 7 : break;
812 : }
813 8 : }
814 :
815 8 : return io_path;
816 : }
817 :
818 : static struct nvme_io_path *
819 39 : nvme_io_path_alloc(void)
820 : {
821 : struct nvme_io_path *io_path;
822 :
823 39 : io_path = calloc(1, sizeof(*io_path));
824 39 : if (io_path == NULL) {
825 0 : SPDK_ERRLOG("Failed to alloc io_path.\n");
826 0 : return NULL;
827 : }
828 :
829 39 : if (g_opts.io_path_stat) {
830 0 : io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
831 0 : if (io_path->stat == NULL) {
832 0 : free(io_path);
833 0 : SPDK_ERRLOG("Failed to alloc io_path stat.\n");
834 0 : return NULL;
835 : }
836 0 : spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
837 0 : }
838 :
839 39 : return io_path;
840 39 : }
841 :
842 : static void
843 39 : nvme_io_path_free(struct nvme_io_path *io_path)
844 : {
845 39 : free(io_path->stat);
846 39 : free(io_path);
847 39 : }
848 :
849 : static int
850 39 : _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
851 : {
852 : struct nvme_io_path *io_path;
853 : struct spdk_io_channel *ch;
854 : struct nvme_ctrlr_channel *ctrlr_ch;
855 : struct nvme_qpair *nvme_qpair;
856 :
857 39 : io_path = nvme_io_path_alloc();
858 39 : if (io_path == NULL) {
859 0 : return -ENOMEM;
860 : }
861 :
862 39 : io_path->nvme_ns = nvme_ns;
863 :
864 39 : ch = spdk_get_io_channel(nvme_ns->ctrlr);
865 39 : if (ch == NULL) {
866 0 : nvme_io_path_free(io_path);
867 0 : SPDK_ERRLOG("Failed to alloc io_channel.\n");
868 0 : return -ENOMEM;
869 : }
870 :
871 39 : ctrlr_ch = spdk_io_channel_get_ctx(ch);
872 :
873 39 : nvme_qpair = ctrlr_ch->qpair;
874 39 : assert(nvme_qpair != NULL);
875 :
876 39 : io_path->qpair = nvme_qpair;
877 39 : TAILQ_INSERT_TAIL(&nvme_qpair->io_path_list, io_path, tailq);
878 :
879 39 : io_path->nbdev_ch = nbdev_ch;
880 39 : STAILQ_INSERT_TAIL(&nbdev_ch->io_path_list, io_path, stailq);
881 :
882 39 : bdev_nvme_clear_current_io_path(nbdev_ch);
883 :
884 39 : return 0;
885 39 : }
886 :
887 : static void
888 39 : bdev_nvme_clear_retry_io_path(struct nvme_bdev_channel *nbdev_ch,
889 : struct nvme_io_path *io_path)
890 : {
891 : struct nvme_bdev_io *bio;
892 :
893 40 : TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
894 1 : if (bio->io_path == io_path) {
895 1 : bio->io_path = NULL;
896 1 : }
897 1 : }
898 39 : }
899 :
900 : static void
901 39 : _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *io_path)
902 : {
903 : struct spdk_io_channel *ch;
904 : struct nvme_qpair *nvme_qpair;
905 : struct nvme_ctrlr_channel *ctrlr_ch;
906 : struct nvme_bdev *nbdev;
907 :
908 39 : nbdev = spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(nbdev_ch));
909 :
910 : /* Add the statistics to nvme_ns before this path is destroyed. */
911 39 : pthread_mutex_lock(&nbdev->mutex);
912 39 : if (nbdev->ref != 0 && io_path->nvme_ns->stat != NULL && io_path->stat != NULL) {
913 0 : spdk_bdev_add_io_stat(io_path->nvme_ns->stat, io_path->stat);
914 0 : }
915 39 : pthread_mutex_unlock(&nbdev->mutex);
916 :
917 39 : bdev_nvme_clear_current_io_path(nbdev_ch);
918 39 : bdev_nvme_clear_retry_io_path(nbdev_ch, io_path);
919 :
920 41 : STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
921 39 : io_path->nbdev_ch = NULL;
922 :
923 39 : nvme_qpair = io_path->qpair;
924 39 : assert(nvme_qpair != NULL);
925 :
926 39 : ctrlr_ch = nvme_qpair->ctrlr_ch;
927 39 : assert(ctrlr_ch != NULL);
928 :
929 39 : ch = spdk_io_channel_from_ctx(ctrlr_ch);
930 39 : spdk_put_io_channel(ch);
931 :
932 : /* After an io_path is removed, I/Os submitted to it may complete and update statistics
933 : * of the io_path. To avoid heap-use-after-free error from this case, do not free the
934 : * io_path here but free the io_path when the associated qpair is freed. It is ensured
935 : * that all I/Os submitted to the io_path are completed when the associated qpair is freed.
936 : */
937 39 : }
938 :
939 : static void
940 26 : _bdev_nvme_delete_io_paths(struct nvme_bdev_channel *nbdev_ch)
941 : {
942 : struct nvme_io_path *io_path, *tmp_io_path;
943 :
944 63 : STAILQ_FOREACH_SAFE(io_path, &nbdev_ch->io_path_list, stailq, tmp_io_path) {
945 37 : _bdev_nvme_delete_io_path(nbdev_ch, io_path);
946 37 : }
947 26 : }
948 :
949 : static int
950 26 : bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
951 : {
952 26 : struct nvme_bdev_channel *nbdev_ch = ctx_buf;
953 26 : struct nvme_bdev *nbdev = io_device;
954 : struct nvme_ns *nvme_ns;
955 : int rc;
956 :
957 26 : STAILQ_INIT(&nbdev_ch->io_path_list);
958 26 : TAILQ_INIT(&nbdev_ch->retry_io_list);
959 :
960 26 : pthread_mutex_lock(&nbdev->mutex);
961 :
962 26 : nbdev_ch->mp_policy = nbdev->mp_policy;
963 26 : nbdev_ch->mp_selector = nbdev->mp_selector;
964 26 : nbdev_ch->rr_min_io = nbdev->rr_min_io;
965 :
966 63 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
967 37 : rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
968 37 : if (rc != 0) {
969 0 : pthread_mutex_unlock(&nbdev->mutex);
970 :
971 0 : _bdev_nvme_delete_io_paths(nbdev_ch);
972 0 : return rc;
973 : }
974 37 : }
975 26 : pthread_mutex_unlock(&nbdev->mutex);
976 :
977 26 : return 0;
978 26 : }
979 :
980 : /* If cpl != NULL, complete the bdev_io with nvme status based on 'cpl'.
981 : * If cpl == NULL, complete the bdev_io with bdev status based on 'status'.
982 : */
983 : static inline void
984 58 : __bdev_nvme_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status,
985 : const struct spdk_nvme_cpl *cpl)
986 : {
987 58 : spdk_trace_record(TRACE_BDEV_NVME_IO_DONE, 0, 0, (uintptr_t)bdev_io->driver_ctx,
988 : (uintptr_t)bdev_io);
989 58 : if (cpl) {
990 29 : spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
991 29 : } else {
992 29 : spdk_bdev_io_complete(bdev_io, status);
993 : }
994 58 : }
995 :
996 : static void bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch);
997 :
998 : static void
999 26 : bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf)
1000 : {
1001 26 : struct nvme_bdev_channel *nbdev_ch = ctx_buf;
1002 :
1003 26 : bdev_nvme_abort_retry_ios(nbdev_ch);
1004 26 : _bdev_nvme_delete_io_paths(nbdev_ch);
1005 26 : }
1006 :
1007 : static inline bool
1008 62 : bdev_nvme_io_type_is_admin(enum spdk_bdev_io_type io_type)
1009 : {
1010 62 : switch (io_type) {
1011 : case SPDK_BDEV_IO_TYPE_RESET:
1012 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
1013 : case SPDK_BDEV_IO_TYPE_ABORT:
1014 5 : return true;
1015 : default:
1016 57 : break;
1017 : }
1018 :
1019 57 : return false;
1020 62 : }
1021 :
1022 : static inline bool
1023 98 : nvme_ns_is_active(struct nvme_ns *nvme_ns)
1024 : {
1025 98 : if (spdk_unlikely(nvme_ns->ana_state_updating)) {
1026 1 : return false;
1027 : }
1028 :
1029 97 : if (spdk_unlikely(nvme_ns->ns == NULL)) {
1030 0 : return false;
1031 : }
1032 :
1033 97 : return true;
1034 98 : }
1035 :
1036 : static inline bool
1037 86 : nvme_ns_is_accessible(struct nvme_ns *nvme_ns)
1038 : {
1039 86 : if (spdk_unlikely(!nvme_ns_is_active(nvme_ns))) {
1040 1 : return false;
1041 : }
1042 :
1043 85 : switch (nvme_ns->ana_state) {
1044 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
1045 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
1046 76 : return true;
1047 : default:
1048 9 : break;
1049 : }
1050 :
1051 9 : return false;
1052 86 : }
1053 :
1054 : static inline bool
1055 128 : nvme_qpair_is_connected(struct nvme_qpair *nvme_qpair)
1056 : {
1057 128 : if (spdk_unlikely(nvme_qpair->qpair == NULL)) {
1058 23 : return false;
1059 : }
1060 :
1061 105 : if (spdk_unlikely(spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
1062 : SPDK_NVME_QPAIR_FAILURE_NONE)) {
1063 2 : return false;
1064 : }
1065 :
1066 103 : if (spdk_unlikely(nvme_qpair->ctrlr_ch->reset_iter != NULL)) {
1067 0 : return false;
1068 : }
1069 :
1070 103 : return true;
1071 128 : }
1072 :
1073 : static inline bool
1074 102 : nvme_io_path_is_available(struct nvme_io_path *io_path)
1075 : {
1076 102 : if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
1077 16 : return false;
1078 : }
1079 :
1080 86 : if (spdk_unlikely(!nvme_ns_is_accessible(io_path->nvme_ns))) {
1081 10 : return false;
1082 : }
1083 :
1084 76 : return true;
1085 102 : }
1086 :
1087 : static inline bool
1088 9 : nvme_ctrlr_is_failed(struct nvme_ctrlr *nvme_ctrlr)
1089 : {
1090 9 : if (nvme_ctrlr->destruct) {
1091 0 : return true;
1092 : }
1093 :
1094 9 : if (nvme_ctrlr->fast_io_fail_timedout) {
1095 2 : return true;
1096 : }
1097 :
1098 7 : if (nvme_ctrlr->resetting) {
1099 5 : if (nvme_ctrlr->opts.reconnect_delay_sec != 0) {
1100 5 : return false;
1101 : } else {
1102 0 : return true;
1103 : }
1104 : }
1105 :
1106 2 : if (nvme_ctrlr->reconnect_is_delayed) {
1107 2 : return false;
1108 : }
1109 :
1110 0 : if (nvme_ctrlr->disabled) {
1111 0 : return true;
1112 : }
1113 :
1114 0 : if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
1115 0 : return true;
1116 : } else {
1117 0 : return false;
1118 : }
1119 9 : }
1120 :
1121 : static bool
1122 20 : nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr)
1123 : {
1124 20 : if (nvme_ctrlr->destruct) {
1125 0 : return false;
1126 : }
1127 :
1128 20 : if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
1129 3 : return false;
1130 : }
1131 :
1132 17 : if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) {
1133 1 : return false;
1134 : }
1135 :
1136 16 : if (nvme_ctrlr->disabled) {
1137 0 : return false;
1138 : }
1139 :
1140 16 : return true;
1141 20 : }
1142 :
1143 : /* Simulate circular linked list. */
1144 : static inline struct nvme_io_path *
1145 99 : nvme_io_path_get_next(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *prev_path)
1146 : {
1147 : struct nvme_io_path *next_path;
1148 :
1149 99 : if (prev_path != NULL) {
1150 39 : next_path = STAILQ_NEXT(prev_path, stailq);
1151 39 : if (next_path != NULL) {
1152 14 : return next_path;
1153 : }
1154 25 : }
1155 :
1156 85 : return STAILQ_FIRST(&nbdev_ch->io_path_list);
1157 99 : }
1158 :
1159 : static struct nvme_io_path *
1160 67 : _bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
1161 : {
1162 67 : struct nvme_io_path *io_path, *start, *non_optimized = NULL;
1163 :
1164 67 : start = nvme_io_path_get_next(nbdev_ch, nbdev_ch->current_io_path);
1165 :
1166 67 : io_path = start;
1167 67 : do {
1168 79 : if (spdk_likely(nvme_io_path_is_available(io_path))) {
1169 57 : switch (io_path->nvme_ns->ana_state) {
1170 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
1171 47 : nbdev_ch->current_io_path = io_path;
1172 47 : return io_path;
1173 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
1174 10 : if (non_optimized == NULL) {
1175 7 : non_optimized = io_path;
1176 7 : }
1177 10 : break;
1178 : default:
1179 0 : assert(false);
1180 : break;
1181 : }
1182 10 : }
1183 32 : io_path = nvme_io_path_get_next(nbdev_ch, io_path);
1184 32 : } while (io_path != start);
1185 :
1186 20 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
1187 : /* We come here only if there is no optimized path. Cache even non_optimized
1188 : * path for load balance across multiple non_optimized paths.
1189 : */
1190 1 : nbdev_ch->current_io_path = non_optimized;
1191 1 : }
1192 :
1193 20 : return non_optimized;
1194 67 : }
1195 :
1196 : static struct nvme_io_path *
1197 4 : _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch)
1198 : {
1199 : struct nvme_io_path *io_path;
1200 4 : struct nvme_io_path *optimized = NULL, *non_optimized = NULL;
1201 4 : uint32_t opt_min_qd = UINT32_MAX, non_opt_min_qd = UINT32_MAX;
1202 : uint32_t num_outstanding_reqs;
1203 :
1204 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
1205 12 : if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
1206 : /* The device is currently resetting. */
1207 0 : continue;
1208 : }
1209 :
1210 12 : if (spdk_unlikely(!nvme_ns_is_active(io_path->nvme_ns))) {
1211 0 : continue;
1212 : }
1213 :
1214 12 : num_outstanding_reqs = spdk_nvme_qpair_get_num_outstanding_reqs(io_path->qpair->qpair);
1215 12 : switch (io_path->nvme_ns->ana_state) {
1216 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
1217 6 : if (num_outstanding_reqs < opt_min_qd) {
1218 5 : opt_min_qd = num_outstanding_reqs;
1219 5 : optimized = io_path;
1220 5 : }
1221 6 : break;
1222 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
1223 3 : if (num_outstanding_reqs < non_opt_min_qd) {
1224 3 : non_opt_min_qd = num_outstanding_reqs;
1225 3 : non_optimized = io_path;
1226 3 : }
1227 3 : break;
1228 : default:
1229 3 : break;
1230 : }
1231 12 : }
1232 :
1233 : /* don't cache io path for BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH selector */
1234 4 : if (optimized != NULL) {
1235 3 : return optimized;
1236 : }
1237 :
1238 1 : return non_optimized;
1239 4 : }
1240 :
1241 : static inline struct nvme_io_path *
1242 105 : bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
1243 : {
1244 105 : if (spdk_likely(nbdev_ch->current_io_path != NULL)) {
1245 41 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) {
1246 31 : return nbdev_ch->current_io_path;
1247 10 : } else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
1248 10 : if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) {
1249 3 : return nbdev_ch->current_io_path;
1250 : }
1251 7 : nbdev_ch->rr_counter = 0;
1252 7 : }
1253 7 : }
1254 :
1255 71 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE ||
1256 14 : nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
1257 67 : return _bdev_nvme_find_io_path(nbdev_ch);
1258 : } else {
1259 4 : return _bdev_nvme_find_io_path_min_qd(nbdev_ch);
1260 : }
1261 105 : }
1262 :
1263 : /* Return true if there is any io_path whose qpair is active or ctrlr is not failed,
1264 : * or false otherwise.
1265 : *
1266 : * If any io_path has an active qpair but find_io_path() returned NULL, its namespace
1267 : * is likely to be non-accessible now but may become accessible.
1268 : *
1269 : * If any io_path has an unfailed ctrlr but find_io_path() returned NULL, the ctrlr
1270 : * is likely to be resetting now but the reset may succeed. A ctrlr is set to unfailed
1271 : * when starting to reset it but it is set to failed when the reset failed. Hence, if
1272 : * a ctrlr is unfailed, it is likely that it works fine or is resetting.
1273 : */
1274 : static bool
1275 15 : any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
1276 : {
1277 : struct nvme_io_path *io_path;
1278 :
1279 15 : if (nbdev_ch->resetting) {
1280 1 : return false;
1281 : }
1282 :
1283 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
1284 14 : if (io_path->nvme_ns->ana_transition_timedout) {
1285 0 : continue;
1286 : }
1287 :
1288 14 : if (nvme_qpair_is_connected(io_path->qpair) ||
1289 9 : !nvme_ctrlr_is_failed(io_path->qpair->ctrlr)) {
1290 12 : return true;
1291 : }
1292 2 : }
1293 :
1294 2 : return false;
1295 15 : }
1296 :
1297 : static void
1298 14 : bdev_nvme_retry_io(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
1299 : {
1300 14 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1301 : struct spdk_io_channel *ch;
1302 :
1303 14 : if (nbdev_io->io_path != NULL && nvme_io_path_is_available(nbdev_io->io_path)) {
1304 3 : _bdev_nvme_submit_request(nbdev_ch, bdev_io);
1305 3 : } else {
1306 11 : ch = spdk_io_channel_from_ctx(nbdev_ch);
1307 11 : bdev_nvme_submit_request(ch, bdev_io);
1308 : }
1309 14 : }
1310 :
1311 : static int
1312 14 : bdev_nvme_retry_ios(void *arg)
1313 : {
1314 14 : struct nvme_bdev_channel *nbdev_ch = arg;
1315 : struct nvme_bdev_io *bio, *tmp_bio;
1316 : uint64_t now, delay_us;
1317 :
1318 14 : now = spdk_get_ticks();
1319 :
1320 28 : TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
1321 15 : if (bio->retry_ticks > now) {
1322 1 : break;
1323 : }
1324 :
1325 14 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
1326 :
1327 14 : bdev_nvme_retry_io(nbdev_ch, spdk_bdev_io_from_ctx(bio));
1328 14 : }
1329 :
1330 14 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1331 :
1332 14 : bio = TAILQ_FIRST(&nbdev_ch->retry_io_list);
1333 14 : if (bio != NULL) {
1334 4 : delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
1335 :
1336 4 : nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
1337 : delay_us);
1338 4 : }
1339 :
1340 14 : return SPDK_POLLER_BUSY;
1341 : }
1342 :
1343 : static void
1344 16 : bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
1345 : struct nvme_bdev_io *bio, uint64_t delay_ms)
1346 : {
1347 : struct nvme_bdev_io *tmp_bio;
1348 :
1349 16 : bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;
1350 :
1351 16 : TAILQ_FOREACH_REVERSE(tmp_bio, &nbdev_ch->retry_io_list, retry_io_head, retry_link) {
1352 1 : if (tmp_bio->retry_ticks <= bio->retry_ticks) {
1353 1 : TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bio, bio,
1354 : retry_link);
1355 1 : return;
1356 : }
1357 0 : }
1358 :
1359 : /* No earlier I/Os were found. This I/O must be the new head. */
1360 15 : TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bio, retry_link);
1361 :
1362 15 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1363 :
1364 15 : nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
1365 : delay_ms * 1000ULL);
1366 16 : }
1367 :
1368 : static void
1369 58 : bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
1370 : {
1371 : struct nvme_bdev_io *bio, *tmp_bio;
1372 :
1373 59 : TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
1374 1 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
1375 1 : __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
1376 1 : }
1377 :
1378 58 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1379 58 : }
1380 :
1381 : static int
1382 6 : bdev_nvme_abort_retry_io(struct nvme_bdev_channel *nbdev_ch,
1383 : struct nvme_bdev_io *bio_to_abort)
1384 : {
1385 : struct nvme_bdev_io *bio;
1386 :
1387 6 : TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
1388 1 : if (bio == bio_to_abort) {
1389 1 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
1390 1 : __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
1391 1 : return 0;
1392 : }
1393 0 : }
1394 :
1395 5 : return -ENOENT;
1396 6 : }
1397 :
1398 : static void
1399 12 : bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl)
1400 : {
1401 : struct nvme_bdev *nbdev;
1402 : uint16_t sct, sc;
1403 :
1404 12 : assert(spdk_nvme_cpl_is_error(cpl));
1405 :
1406 12 : nbdev = bdev_io->bdev->ctxt;
1407 :
1408 12 : if (nbdev->err_stat == NULL) {
1409 12 : return;
1410 : }
1411 :
1412 0 : sct = cpl->status.sct;
1413 0 : sc = cpl->status.sc;
1414 :
1415 0 : pthread_mutex_lock(&nbdev->mutex);
1416 :
1417 0 : nbdev->err_stat->status_type[sct]++;
1418 0 : switch (sct) {
1419 : case SPDK_NVME_SCT_GENERIC:
1420 : case SPDK_NVME_SCT_COMMAND_SPECIFIC:
1421 : case SPDK_NVME_SCT_MEDIA_ERROR:
1422 : case SPDK_NVME_SCT_PATH:
1423 0 : nbdev->err_stat->status[sct][sc]++;
1424 0 : break;
1425 : default:
1426 0 : break;
1427 : }
1428 :
1429 0 : pthread_mutex_unlock(&nbdev->mutex);
1430 12 : }
1431 :
1432 : static inline void
1433 20 : bdev_nvme_update_io_path_stat(struct nvme_bdev_io *bio)
1434 : {
1435 20 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1436 20 : uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
1437 20 : uint32_t blocklen = bdev_io->bdev->blocklen;
1438 : struct spdk_bdev_io_stat *stat;
1439 : uint64_t tsc_diff;
1440 :
1441 20 : if (bio->io_path->stat == NULL) {
1442 20 : return;
1443 : }
1444 :
1445 0 : tsc_diff = spdk_get_ticks() - bio->submit_tsc;
1446 0 : stat = bio->io_path->stat;
1447 :
1448 0 : switch (bdev_io->type) {
1449 : case SPDK_BDEV_IO_TYPE_READ:
1450 0 : stat->bytes_read += num_blocks * blocklen;
1451 0 : stat->num_read_ops++;
1452 0 : stat->read_latency_ticks += tsc_diff;
1453 0 : if (stat->max_read_latency_ticks < tsc_diff) {
1454 0 : stat->max_read_latency_ticks = tsc_diff;
1455 0 : }
1456 0 : if (stat->min_read_latency_ticks > tsc_diff) {
1457 0 : stat->min_read_latency_ticks = tsc_diff;
1458 0 : }
1459 0 : break;
1460 : case SPDK_BDEV_IO_TYPE_WRITE:
1461 0 : stat->bytes_written += num_blocks * blocklen;
1462 0 : stat->num_write_ops++;
1463 0 : stat->write_latency_ticks += tsc_diff;
1464 0 : if (stat->max_write_latency_ticks < tsc_diff) {
1465 0 : stat->max_write_latency_ticks = tsc_diff;
1466 0 : }
1467 0 : if (stat->min_write_latency_ticks > tsc_diff) {
1468 0 : stat->min_write_latency_ticks = tsc_diff;
1469 0 : }
1470 0 : break;
1471 : case SPDK_BDEV_IO_TYPE_UNMAP:
1472 0 : stat->bytes_unmapped += num_blocks * blocklen;
1473 0 : stat->num_unmap_ops++;
1474 0 : stat->unmap_latency_ticks += tsc_diff;
1475 0 : if (stat->max_unmap_latency_ticks < tsc_diff) {
1476 0 : stat->max_unmap_latency_ticks = tsc_diff;
1477 0 : }
1478 0 : if (stat->min_unmap_latency_ticks > tsc_diff) {
1479 0 : stat->min_unmap_latency_ticks = tsc_diff;
1480 0 : }
1481 0 : break;
1482 : case SPDK_BDEV_IO_TYPE_ZCOPY:
1483 : /* Track the data in the start phase only */
1484 0 : if (!bdev_io->u.bdev.zcopy.start) {
1485 0 : break;
1486 : }
1487 0 : if (bdev_io->u.bdev.zcopy.populate) {
1488 0 : stat->bytes_read += num_blocks * blocklen;
1489 0 : stat->num_read_ops++;
1490 0 : stat->read_latency_ticks += tsc_diff;
1491 0 : if (stat->max_read_latency_ticks < tsc_diff) {
1492 0 : stat->max_read_latency_ticks = tsc_diff;
1493 0 : }
1494 0 : if (stat->min_read_latency_ticks > tsc_diff) {
1495 0 : stat->min_read_latency_ticks = tsc_diff;
1496 0 : }
1497 0 : } else {
1498 0 : stat->bytes_written += num_blocks * blocklen;
1499 0 : stat->num_write_ops++;
1500 0 : stat->write_latency_ticks += tsc_diff;
1501 0 : if (stat->max_write_latency_ticks < tsc_diff) {
1502 0 : stat->max_write_latency_ticks = tsc_diff;
1503 0 : }
1504 0 : if (stat->min_write_latency_ticks > tsc_diff) {
1505 0 : stat->min_write_latency_ticks = tsc_diff;
1506 0 : }
1507 : }
1508 0 : break;
1509 : case SPDK_BDEV_IO_TYPE_COPY:
1510 0 : stat->bytes_copied += num_blocks * blocklen;
1511 0 : stat->num_copy_ops++;
1512 0 : stat->copy_latency_ticks += tsc_diff;
1513 0 : if (stat->max_copy_latency_ticks < tsc_diff) {
1514 0 : stat->max_copy_latency_ticks = tsc_diff;
1515 0 : }
1516 0 : if (stat->min_copy_latency_ticks > tsc_diff) {
1517 0 : stat->min_copy_latency_ticks = tsc_diff;
1518 0 : }
1519 0 : break;
1520 : default:
1521 0 : break;
1522 : }
1523 20 : }
1524 :
1525 : static bool
1526 11 : bdev_nvme_check_retry_io(struct nvme_bdev_io *bio,
1527 : const struct spdk_nvme_cpl *cpl,
1528 : struct nvme_bdev_channel *nbdev_ch,
1529 : uint64_t *_delay_ms)
1530 : {
1531 11 : struct nvme_io_path *io_path = bio->io_path;
1532 11 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
1533 : const struct spdk_nvme_ctrlr_data *cdata;
1534 :
1535 15 : if (spdk_nvme_cpl_is_path_error(cpl) ||
1536 5 : spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
1537 0 : !nvme_io_path_is_available(io_path) ||
1538 4 : !nvme_ctrlr_is_available(nvme_ctrlr)) {
1539 15 : bdev_nvme_clear_current_io_path(nbdev_ch);
1540 15 : bio->io_path = NULL;
1541 15 : if (spdk_nvme_cpl_is_ana_error(cpl)) {
1542 1 : if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
1543 1 : io_path->nvme_ns->ana_state_updating = true;
1544 1 : }
1545 1 : }
1546 3 : if (!any_io_path_may_become_available(nbdev_ch)) {
1547 0 : return false;
1548 : }
1549 3 : *_delay_ms = 0;
1550 3 : } else {
1551 4 : bio->retry_count++;
1552 :
1553 4 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
1554 :
1555 4 : if (cpl->status.crd != 0) {
1556 1 : *_delay_ms = cdata->crdt[cpl->status.crd] * 100;
1557 1 : } else {
1558 3 : *_delay_ms = 0;
1559 : }
1560 : }
1561 :
1562 7 : return true;
1563 7 : }
1564 :
1565 : static inline void
1566 40 : bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
1567 : const struct spdk_nvme_cpl *cpl)
1568 : {
1569 40 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1570 : struct nvme_bdev_channel *nbdev_ch;
1571 : uint64_t delay_ms;
1572 :
1573 40 : assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
1574 :
1575 40 : if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
1576 20 : bdev_nvme_update_io_path_stat(bio);
1577 20 : goto complete;
1578 : }
1579 :
1580 : /* Update error counts before deciding if retry is needed.
1581 : * Hence, error counts may be more than the number of I/O errors.
1582 : */
1583 20 : bdev_nvme_update_nvme_error_stat(bdev_io, cpl);
1584 :
1585 27 : if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) ||
1586 2 : (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
1587 23 : goto complete;
1588 : }
1589 :
1590 : /* At this point we don't know whether the sequence was successfully executed or not, so we
1591 : * cannot retry the IO */
1592 7 : if (bdev_io->u.bdev.accel_sequence != NULL) {
1593 0 : goto complete;
1594 : }
1595 :
1596 7 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
1597 :
1598 7 : if (bdev_nvme_check_retry_io(bio, cpl, nbdev_ch, &delay_ms)) {
1599 7 : bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
1600 7 : return;
1601 : }
1602 :
1603 : complete:
1604 25 : bio->retry_count = 0;
1605 25 : bio->submit_tsc = 0;
1606 25 : bdev_io->u.bdev.accel_sequence = NULL;
1607 25 : __bdev_nvme_io_complete(bdev_io, 0, cpl);
1608 32 : }
1609 :
1610 : static inline void
1611 13 : bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
1612 : {
1613 13 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1614 : struct nvme_bdev_channel *nbdev_ch;
1615 : enum spdk_bdev_io_status io_status;
1616 :
1617 13 : assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
1618 :
1619 13 : switch (rc) {
1620 : case 0:
1621 1 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1622 1 : break;
1623 : case -ENOMEM:
1624 0 : io_status = SPDK_BDEV_IO_STATUS_NOMEM;
1625 0 : break;
1626 : case -ENXIO:
1627 15 : if (g_opts.bdev_retry_count == -1 || bio->retry_count < g_opts.bdev_retry_count) {
1628 12 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
1629 :
1630 12 : bdev_nvme_clear_current_io_path(nbdev_ch);
1631 12 : bio->io_path = NULL;
1632 :
1633 12 : if (any_io_path_may_become_available(nbdev_ch)) {
1634 9 : bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
1635 9 : return;
1636 : }
1637 3 : }
1638 :
1639 : /* fallthrough */
1640 : default:
1641 3 : spdk_accel_sequence_abort(bdev_io->u.bdev.accel_sequence);
1642 3 : bdev_io->u.bdev.accel_sequence = NULL;
1643 3 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
1644 3 : break;
1645 : }
1646 :
1647 4 : bio->retry_count = 0;
1648 4 : bio->submit_tsc = 0;
1649 4 : __bdev_nvme_io_complete(bdev_io, io_status, NULL);
1650 13 : }
1651 :
1652 : static inline void
1653 4 : bdev_nvme_admin_complete(struct nvme_bdev_io *bio, int rc)
1654 : {
1655 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1656 : enum spdk_bdev_io_status io_status;
1657 :
1658 4 : switch (rc) {
1659 : case 0:
1660 1 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1661 1 : break;
1662 : case -ENOMEM:
1663 0 : io_status = SPDK_BDEV_IO_STATUS_NOMEM;
1664 0 : break;
1665 1 : case -ENXIO:
1666 : /* fallthrough */
1667 : default:
1668 3 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
1669 3 : break;
1670 : }
1671 :
1672 4 : __bdev_nvme_io_complete(bdev_io, io_status, NULL);
1673 4 : }
1674 :
1675 : static void
1676 3 : bdev_nvme_clear_io_path_caches_done(struct nvme_ctrlr *nvme_ctrlr,
1677 : void *ctx, int status)
1678 : {
1679 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1680 :
1681 3 : assert(nvme_ctrlr->io_path_cache_clearing == true);
1682 3 : nvme_ctrlr->io_path_cache_clearing = false;
1683 :
1684 3 : if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
1685 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1686 3 : return;
1687 : }
1688 :
1689 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1690 :
1691 0 : nvme_ctrlr_unregister(nvme_ctrlr);
1692 3 : }
1693 :
1694 : static void
1695 416 : _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
1696 : {
1697 : struct nvme_io_path *io_path;
1698 :
1699 651 : TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
1700 235 : if (io_path->nbdev_ch == NULL) {
1701 72 : continue;
1702 : }
1703 163 : bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
1704 163 : }
1705 416 : }
1706 :
1707 : static void
1708 1 : bdev_nvme_clear_io_path_cache(struct nvme_ctrlr_channel_iter *i,
1709 : struct nvme_ctrlr *nvme_ctrlr,
1710 : struct nvme_ctrlr_channel *ctrlr_ch,
1711 : void *ctx)
1712 : {
1713 1 : assert(ctrlr_ch->qpair != NULL);
1714 :
1715 1 : _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
1716 :
1717 1 : nvme_ctrlr_for_each_channel_continue(i, 0);
1718 1 : }
1719 :
1720 : static void
1721 3 : bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr)
1722 : {
1723 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1724 3 : if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
1725 3 : nvme_ctrlr->io_path_cache_clearing) {
1726 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1727 0 : return;
1728 : }
1729 :
1730 3 : nvme_ctrlr->io_path_cache_clearing = true;
1731 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1732 :
1733 3 : nvme_ctrlr_for_each_channel(nvme_ctrlr,
1734 : bdev_nvme_clear_io_path_cache,
1735 : NULL,
1736 : bdev_nvme_clear_io_path_caches_done);
1737 3 : }
1738 :
1739 : static struct nvme_qpair *
1740 121 : nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair *qpair)
1741 : {
1742 : struct nvme_qpair *nvme_qpair;
1743 :
1744 138 : TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
1745 138 : if (nvme_qpair->qpair == qpair) {
1746 121 : break;
1747 : }
1748 17 : }
1749 :
1750 121 : return nvme_qpair;
1751 : }
1752 :
1753 : static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair);
1754 :
1755 : static void
1756 121 : bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
1757 : {
1758 121 : struct nvme_poll_group *group = poll_group_ctx;
1759 : struct nvme_qpair *nvme_qpair;
1760 : struct nvme_ctrlr *nvme_ctrlr;
1761 : struct nvme_ctrlr_channel *ctrlr_ch;
1762 : int status;
1763 :
1764 121 : nvme_qpair = nvme_poll_group_get_qpair(group, qpair);
1765 121 : if (nvme_qpair == NULL) {
1766 0 : return;
1767 : }
1768 :
1769 121 : if (nvme_qpair->qpair != NULL) {
1770 121 : spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
1771 121 : nvme_qpair->qpair = NULL;
1772 121 : }
1773 :
1774 121 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1775 :
1776 121 : nvme_ctrlr = nvme_qpair->ctrlr;
1777 121 : ctrlr_ch = nvme_qpair->ctrlr_ch;
1778 :
1779 121 : if (ctrlr_ch != NULL) {
1780 74 : if (ctrlr_ch->reset_iter != NULL) {
1781 : /* We are in a full reset sequence. */
1782 69 : if (ctrlr_ch->connect_poller != NULL) {
1783 : /* qpair was failed to connect. Abort the reset sequence. */
1784 0 : NVME_CTRLR_INFOLOG(nvme_ctrlr,
1785 : "qpair %p was failed to connect. abort the reset ctrlr sequence.\n",
1786 : qpair);
1787 0 : spdk_poller_unregister(&ctrlr_ch->connect_poller);
1788 0 : status = -1;
1789 0 : } else {
1790 : /* qpair was completed to disconnect. Just move to the next ctrlr_channel. */
1791 69 : NVME_CTRLR_INFOLOG(nvme_ctrlr,
1792 : "qpair %p was disconnected and freed in a reset ctrlr sequence.\n",
1793 : qpair);
1794 69 : status = 0;
1795 : }
1796 69 : nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, status);
1797 69 : ctrlr_ch->reset_iter = NULL;
1798 69 : } else {
1799 : /* qpair was disconnected unexpectedly. Reset controller for recovery. */
1800 5 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpair %p was disconnected and freed. reset controller.\n",
1801 : qpair);
1802 5 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
1803 : }
1804 74 : } else {
1805 : /* In this case, ctrlr_channel is already deleted. */
1806 47 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpair %p was disconnected and freed. delete nvme_qpair.\n",
1807 : qpair);
1808 47 : nvme_qpair_delete(nvme_qpair);
1809 : }
1810 121 : }
1811 :
1812 : static void
1813 0 : bdev_nvme_check_io_qpairs(struct nvme_poll_group *group)
1814 : {
1815 : struct nvme_qpair *nvme_qpair;
1816 :
1817 0 : TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
1818 0 : if (nvme_qpair->qpair == NULL || nvme_qpair->ctrlr_ch == NULL) {
1819 0 : continue;
1820 : }
1821 :
1822 0 : if (spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
1823 : SPDK_NVME_QPAIR_FAILURE_NONE) {
1824 0 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1825 0 : }
1826 0 : }
1827 0 : }
1828 :
1829 : static int
1830 1209 : bdev_nvme_poll(void *arg)
1831 : {
1832 1209 : struct nvme_poll_group *group = arg;
1833 : int64_t num_completions;
1834 :
1835 1209 : if (group->collect_spin_stat && group->start_ticks == 0) {
1836 0 : group->start_ticks = spdk_get_ticks();
1837 0 : }
1838 :
1839 1209 : num_completions = spdk_nvme_poll_group_process_completions(group->group, 0,
1840 : bdev_nvme_disconnected_qpair_cb);
1841 1209 : if (group->collect_spin_stat) {
1842 0 : if (num_completions > 0) {
1843 0 : if (group->end_ticks != 0) {
1844 0 : group->spin_ticks += (group->end_ticks - group->start_ticks);
1845 0 : group->end_ticks = 0;
1846 0 : }
1847 0 : group->start_ticks = 0;
1848 0 : } else {
1849 0 : group->end_ticks = spdk_get_ticks();
1850 : }
1851 0 : }
1852 :
1853 1209 : if (spdk_unlikely(num_completions < 0)) {
1854 0 : bdev_nvme_check_io_qpairs(group);
1855 0 : }
1856 :
1857 1209 : return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
1858 : }
1859 :
1860 : static int bdev_nvme_poll_adminq(void *arg);
1861 :
1862 : static void
1863 142 : bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_period_us)
1864 : {
1865 142 : if (spdk_interrupt_mode_is_enabled()) {
1866 0 : return;
1867 : }
1868 :
1869 142 : spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
1870 :
1871 142 : nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq,
1872 : nvme_ctrlr, new_period_us);
1873 142 : }
1874 :
1875 : static int
1876 202 : bdev_nvme_poll_adminq(void *arg)
1877 : {
1878 : int32_t rc;
1879 202 : struct nvme_ctrlr *nvme_ctrlr = arg;
1880 : nvme_ctrlr_disconnected_cb disconnected_cb;
1881 :
1882 202 : assert(nvme_ctrlr != NULL);
1883 :
1884 202 : rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
1885 202 : if (rc < 0) {
1886 86 : disconnected_cb = nvme_ctrlr->disconnected_cb;
1887 86 : nvme_ctrlr->disconnected_cb = NULL;
1888 :
1889 86 : if (disconnected_cb != NULL) {
1890 142 : bdev_nvme_change_adminq_poll_period(nvme_ctrlr,
1891 71 : g_opts.nvme_adminq_poll_period_us);
1892 71 : disconnected_cb(nvme_ctrlr);
1893 71 : } else {
1894 15 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
1895 : }
1896 202 : } else if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(nvme_ctrlr->ctrlr) !=
1897 : SPDK_NVME_QPAIR_FAILURE_NONE) {
1898 0 : bdev_nvme_clear_io_path_caches(nvme_ctrlr);
1899 0 : }
1900 :
1901 202 : return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
1902 : }
1903 :
1904 : static void
1905 39 : nvme_bdev_free(void *io_device)
1906 : {
1907 39 : struct nvme_bdev *nvme_disk = io_device;
1908 :
1909 39 : pthread_mutex_destroy(&nvme_disk->mutex);
1910 39 : free(nvme_disk->disk.name);
1911 39 : free(nvme_disk->err_stat);
1912 39 : free(nvme_disk);
1913 39 : }
1914 :
1915 : static int
1916 38 : bdev_nvme_destruct(void *ctx)
1917 : {
1918 38 : struct nvme_bdev *nvme_disk = ctx;
1919 : struct nvme_ns *nvme_ns, *tmp_nvme_ns;
1920 :
1921 : SPDK_DTRACE_PROBE2(bdev_nvme_destruct, nvme_disk->nbdev_ctrlr->name, nvme_disk->nsid);
1922 :
1923 77 : TAILQ_FOREACH_SAFE(nvme_ns, &nvme_disk->nvme_ns_list, tailq, tmp_nvme_ns) {
1924 39 : pthread_mutex_lock(&nvme_ns->ctrlr->mutex);
1925 :
1926 39 : nvme_ns->bdev = NULL;
1927 :
1928 39 : assert(nvme_ns->id > 0);
1929 :
1930 39 : if (nvme_ctrlr_get_ns(nvme_ns->ctrlr, nvme_ns->id) == NULL) {
1931 0 : pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
1932 :
1933 0 : nvme_ctrlr_put_ref(nvme_ns->ctrlr);
1934 0 : nvme_ns_free(nvme_ns);
1935 0 : } else {
1936 39 : pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
1937 : }
1938 39 : }
1939 :
1940 38 : pthread_mutex_lock(&g_bdev_nvme_mutex);
1941 38 : TAILQ_REMOVE(&nvme_disk->nbdev_ctrlr->bdevs, nvme_disk, tailq);
1942 38 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
1943 :
1944 38 : spdk_io_device_unregister(nvme_disk, nvme_bdev_free);
1945 :
1946 38 : return 0;
1947 : }
1948 :
1949 : static int
1950 122 : bdev_nvme_create_qpair(struct nvme_qpair *nvme_qpair)
1951 : {
1952 : struct nvme_ctrlr *nvme_ctrlr;
1953 : struct spdk_nvme_io_qpair_opts opts;
1954 : struct spdk_nvme_qpair *qpair;
1955 : int rc;
1956 :
1957 122 : nvme_ctrlr = nvme_qpair->ctrlr;
1958 :
1959 122 : spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
1960 122 : opts.create_only = true;
1961 : /* In interrupt mode qpairs must be created in sync mode, else it will never be connected.
1962 : * delay_cmd_submit must be false as in interrupt mode requests cannot be submitted in
1963 : * completion context.
1964 : */
1965 122 : if (!spdk_interrupt_mode_is_enabled()) {
1966 122 : opts.async_mode = true;
1967 122 : opts.delay_cmd_submit = g_opts.delay_cmd_submit;
1968 122 : }
1969 122 : opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests);
1970 122 : g_opts.io_queue_requests = opts.io_queue_requests;
1971 :
1972 122 : qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
1973 122 : if (qpair == NULL) {
1974 0 : return -1;
1975 : }
1976 :
1977 : SPDK_DTRACE_PROBE3(bdev_nvme_create_qpair, nvme_ctrlr->nbdev_ctrlr->name,
1978 : spdk_nvme_qpair_get_id(qpair), spdk_thread_get_id(nvme_ctrlr->thread));
1979 :
1980 122 : assert(nvme_qpair->group != NULL);
1981 :
1982 122 : rc = spdk_nvme_poll_group_add(nvme_qpair->group->group, qpair);
1983 122 : if (rc != 0) {
1984 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Unable to begin polling on NVMe Channel.\n");
1985 0 : goto err;
1986 : }
1987 :
1988 122 : rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_ctrlr->ctrlr, qpair);
1989 122 : if (rc != 0) {
1990 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Unable to connect I/O qpair.\n");
1991 0 : goto err;
1992 : }
1993 :
1994 122 : nvme_qpair->qpair = qpair;
1995 :
1996 122 : if (!g_opts.disable_auto_failback) {
1997 85 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1998 85 : }
1999 :
2000 122 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Connecting qpair %p:%u started.\n",
2001 : qpair, spdk_nvme_qpair_get_id(qpair));
2002 :
2003 122 : return 0;
2004 :
2005 : err:
2006 0 : spdk_nvme_ctrlr_free_io_qpair(qpair);
2007 :
2008 0 : return rc;
2009 122 : }
2010 :
2011 : static void bdev_nvme_reset_io_continue(void *cb_arg, int rc);
2012 :
2013 : static void
2014 71 : bdev_nvme_complete_pending_resets(struct nvme_ctrlr *nvme_ctrlr, bool success)
2015 : {
2016 71 : int rc = 0;
2017 : struct nvme_bdev_io *bio;
2018 :
2019 71 : if (!success) {
2020 33 : rc = -1;
2021 33 : }
2022 :
2023 83 : while (!TAILQ_EMPTY(&nvme_ctrlr->pending_resets)) {
2024 12 : bio = TAILQ_FIRST(&nvme_ctrlr->pending_resets);
2025 12 : TAILQ_REMOVE(&nvme_ctrlr->pending_resets, bio, retry_link);
2026 :
2027 12 : bdev_nvme_reset_io_continue(bio, rc);
2028 : }
2029 71 : }
2030 :
2031 : /* This function marks the current trid as failed by storing the current ticks
2032 : * and then sets the next trid to the active trid within a controller if exists.
2033 : *
2034 : * The purpose of the boolean return value is to request the caller to disconnect
2035 : * the current trid now to try connecting the next trid.
2036 : */
2037 : static bool
2038 62 : bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove, bool start)
2039 : {
2040 : struct nvme_path_id *path_id, *next_path;
2041 : int rc __attribute__((unused));
2042 :
2043 62 : path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
2044 62 : assert(path_id);
2045 62 : assert(path_id == nvme_ctrlr->active_path_id);
2046 62 : next_path = TAILQ_NEXT(path_id, link);
2047 :
2048 : /* Update the last failed time. It means the trid is failed if its last
2049 : * failed time is non-zero.
2050 : */
2051 62 : path_id->last_failed_tsc = spdk_get_ticks();
2052 :
2053 62 : if (next_path == NULL) {
2054 : /* There is no alternate trid within a controller. */
2055 51 : return false;
2056 : }
2057 :
2058 11 : if (!start && nvme_ctrlr->opts.reconnect_delay_sec == 0) {
2059 : /* Connect is not retried in a controller reset sequence. Connecting
2060 : * the next trid will be done by the next bdev_nvme_failover_ctrlr() call.
2061 : */
2062 3 : return false;
2063 : }
2064 :
2065 8 : assert(path_id->trid.trtype != SPDK_NVME_TRANSPORT_PCIE);
2066 :
2067 8 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Start failover from %s:%s to %s:%s\n",
2068 : path_id->trid.traddr, path_id->trid.trsvcid,
2069 : next_path->trid.traddr, next_path->trid.trsvcid);
2070 :
2071 8 : spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
2072 8 : nvme_ctrlr->active_path_id = next_path;
2073 8 : rc = spdk_nvme_ctrlr_set_trid(nvme_ctrlr->ctrlr, &next_path->trid);
2074 8 : assert(rc == 0);
2075 8 : TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
2076 8 : if (!remove) {
2077 : /** Shuffle the old trid to the end of the list and use the new one.
2078 : * Allows for round robin through multiple connections.
2079 : */
2080 6 : TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, path_id, link);
2081 6 : } else {
2082 2 : free(path_id);
2083 : }
2084 :
2085 8 : if (start || next_path->last_failed_tsc == 0) {
2086 : /* bdev_nvme_failover_ctrlr() is just called or the next trid is not failed
2087 : * or used yet. Try the next trid now.
2088 : */
2089 7 : return true;
2090 : }
2091 :
2092 2 : if (spdk_get_ticks() > next_path->last_failed_tsc + spdk_get_ticks_hz() *
2093 1 : nvme_ctrlr->opts.reconnect_delay_sec) {
2094 : /* Enough backoff passed since the next trid failed. Try the next trid now. */
2095 0 : return true;
2096 : }
2097 :
2098 : /* The next trid will be tried after reconnect_delay_sec seconds. */
2099 1 : return false;
2100 62 : }
2101 :
2102 : static bool
2103 89 : bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr)
2104 : {
2105 : int32_t elapsed;
2106 :
2107 89 : if (nvme_ctrlr->opts.ctrlr_loss_timeout_sec == 0 ||
2108 37 : nvme_ctrlr->opts.ctrlr_loss_timeout_sec == -1) {
2109 63 : return false;
2110 : }
2111 :
2112 26 : elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
2113 26 : if (elapsed >= nvme_ctrlr->opts.ctrlr_loss_timeout_sec) {
2114 6 : return true;
2115 : } else {
2116 20 : return false;
2117 : }
2118 89 : }
2119 :
2120 : static bool
2121 12 : bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr)
2122 : {
2123 : uint32_t elapsed;
2124 :
2125 12 : if (nvme_ctrlr->opts.fast_io_fail_timeout_sec == 0) {
2126 8 : return false;
2127 : }
2128 :
2129 4 : elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
2130 4 : if (elapsed >= nvme_ctrlr->opts.fast_io_fail_timeout_sec) {
2131 2 : return true;
2132 : } else {
2133 2 : return false;
2134 : }
2135 12 : }
2136 :
2137 : static void bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success);
2138 :
2139 : static void
2140 72 : nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb cb_fn)
2141 : {
2142 : int rc;
2143 :
2144 72 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start disconnecting ctrlr.\n");
2145 :
2146 72 : rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
2147 72 : if (rc != 0) {
2148 1 : NVME_CTRLR_WARNLOG(nvme_ctrlr, "disconnecting ctrlr failed.\n");
2149 :
2150 : /* Disconnect fails if ctrlr is already resetting or removed. In this case,
2151 : * fail the reset sequence immediately.
2152 : */
2153 1 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2154 1 : return;
2155 : }
2156 :
2157 : /* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
2158 : * Set callback here to execute the specified operation after ctrlr is really disconnected.
2159 : */
2160 71 : assert(nvme_ctrlr->disconnected_cb == NULL);
2161 71 : nvme_ctrlr->disconnected_cb = cb_fn;
2162 :
2163 : /* During disconnection, reduce the period to poll adminq more often. */
2164 71 : bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0);
2165 72 : }
2166 :
2167 : enum bdev_nvme_op_after_reset {
2168 : OP_NONE,
2169 : OP_COMPLETE_PENDING_DESTRUCT,
2170 : OP_DESTRUCT,
2171 : OP_DELAYED_RECONNECT,
2172 : OP_FAILOVER,
2173 : };
2174 :
2175 : typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset;
2176 :
2177 : static _bdev_nvme_op_after_reset
2178 71 : bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success)
2179 : {
2180 71 : if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
2181 : /* Complete pending destruct after reset completes. */
2182 0 : return OP_COMPLETE_PENDING_DESTRUCT;
2183 71 : } else if (nvme_ctrlr->pending_failover) {
2184 3 : nvme_ctrlr->pending_failover = false;
2185 3 : nvme_ctrlr->reset_start_tsc = 0;
2186 3 : return OP_FAILOVER;
2187 68 : } else if (success || nvme_ctrlr->opts.reconnect_delay_sec == 0) {
2188 54 : nvme_ctrlr->reset_start_tsc = 0;
2189 54 : return OP_NONE;
2190 14 : } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
2191 2 : return OP_DESTRUCT;
2192 : } else {
2193 12 : if (bdev_nvme_check_fast_io_fail_timeout(nvme_ctrlr)) {
2194 2 : nvme_ctrlr->fast_io_fail_timedout = true;
2195 2 : }
2196 12 : return OP_DELAYED_RECONNECT;
2197 : }
2198 71 : }
2199 :
2200 : static int bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug);
2201 : static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
2202 :
2203 : static int
2204 9 : bdev_nvme_reconnect_delay_timer_expired(void *ctx)
2205 : {
2206 9 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2207 :
2208 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect_delay, nvme_ctrlr->nbdev_ctrlr->name);
2209 9 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2210 :
2211 9 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2212 :
2213 9 : if (!nvme_ctrlr->reconnect_is_delayed) {
2214 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2215 0 : return SPDK_POLLER_BUSY;
2216 : }
2217 :
2218 9 : nvme_ctrlr->reconnect_is_delayed = false;
2219 :
2220 9 : if (nvme_ctrlr->destruct) {
2221 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2222 0 : return SPDK_POLLER_BUSY;
2223 : }
2224 :
2225 9 : assert(nvme_ctrlr->resetting == false);
2226 9 : nvme_ctrlr->resetting = true;
2227 :
2228 9 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2229 :
2230 9 : spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
2231 :
2232 9 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2233 9 : return SPDK_POLLER_BUSY;
2234 9 : }
2235 :
2236 : static void
2237 12 : bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)
2238 : {
2239 12 : spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
2240 :
2241 12 : assert(nvme_ctrlr->reconnect_is_delayed == false);
2242 12 : nvme_ctrlr->reconnect_is_delayed = true;
2243 :
2244 12 : assert(nvme_ctrlr->reconnect_delay_timer == NULL);
2245 12 : nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired,
2246 : nvme_ctrlr,
2247 : nvme_ctrlr->opts.reconnect_delay_sec * SPDK_SEC_TO_USEC);
2248 12 : }
2249 :
2250 : static void remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr);
2251 :
2252 : static void
2253 71 : bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
2254 : {
2255 71 : bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
2256 71 : void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
2257 : enum bdev_nvme_op_after_reset op_after_reset;
2258 :
2259 71 : assert(nvme_ctrlr->thread == spdk_get_thread());
2260 :
2261 71 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2262 71 : if (!success) {
2263 : /* Connecting the active trid failed. Set the next alternate trid to the
2264 : * active trid if it exists.
2265 : */
2266 35 : if (bdev_nvme_failover_trid(nvme_ctrlr, false, false)) {
2267 : /* The next alternate trid exists and is ready to try. Try it now. */
2268 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2269 :
2270 2 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Try the next alternate trid %s:%s now.\n",
2271 : nvme_ctrlr->active_path_id->trid.traddr,
2272 : nvme_ctrlr->active_path_id->trid.trsvcid);
2273 :
2274 2 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
2275 2 : return;
2276 : }
2277 :
2278 : /* We came here if there is no alternate trid or if the next trid exists but
2279 : * is not ready to try. We will try the active trid after reconnect_delay_sec
2280 : * seconds if it is non-zero or at the next reset call otherwise.
2281 : */
2282 33 : } else {
2283 : /* Connecting the active trid succeeded. Clear the last failed time because it
2284 : * means the trid is failed if its last failed time is non-zero.
2285 : */
2286 36 : nvme_ctrlr->active_path_id->last_failed_tsc = 0;
2287 : }
2288 :
2289 69 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Clear pending resets.\n");
2290 :
2291 : /* Make sure we clear any pending resets before returning. */
2292 69 : bdev_nvme_complete_pending_resets(nvme_ctrlr, success);
2293 :
2294 69 : if (!success) {
2295 33 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Resetting controller failed.\n");
2296 33 : } else {
2297 36 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Resetting controller successful.\n");
2298 : }
2299 :
2300 69 : nvme_ctrlr->resetting = false;
2301 69 : nvme_ctrlr->dont_retry = false;
2302 69 : nvme_ctrlr->in_failover = false;
2303 :
2304 69 : nvme_ctrlr->ctrlr_op_cb_fn = NULL;
2305 69 : nvme_ctrlr->ctrlr_op_cb_arg = NULL;
2306 :
2307 69 : op_after_reset = bdev_nvme_check_op_after_reset(nvme_ctrlr, success);
2308 69 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2309 :
2310 : /* Delay callbacks when the next operation is a failover. */
2311 69 : if (ctrlr_op_cb_fn && op_after_reset != OP_FAILOVER) {
2312 17 : ctrlr_op_cb_fn(ctrlr_op_cb_arg, success ? 0 : -1);
2313 17 : }
2314 :
2315 69 : switch (op_after_reset) {
2316 : case OP_COMPLETE_PENDING_DESTRUCT:
2317 0 : nvme_ctrlr_unregister(nvme_ctrlr);
2318 0 : break;
2319 : case OP_DESTRUCT:
2320 2 : bdev_nvme_delete_ctrlr(nvme_ctrlr, false);
2321 2 : remove_discovery_entry(nvme_ctrlr);
2322 2 : break;
2323 : case OP_DELAYED_RECONNECT:
2324 12 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_start_reconnect_delay_timer);
2325 12 : break;
2326 : case OP_FAILOVER:
2327 3 : nvme_ctrlr->ctrlr_op_cb_fn = ctrlr_op_cb_fn;
2328 3 : nvme_ctrlr->ctrlr_op_cb_arg = ctrlr_op_cb_arg;
2329 3 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
2330 3 : break;
2331 : default:
2332 52 : break;
2333 : }
2334 71 : }
2335 :
2336 : static void
2337 0 : bdev_nvme_reset_create_qpairs_failed(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
2338 : {
2339 0 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2340 0 : }
2341 :
2342 : static void
2343 104 : bdev_nvme_reset_destroy_qpair(struct nvme_ctrlr_channel_iter *i,
2344 : struct nvme_ctrlr *nvme_ctrlr,
2345 : struct nvme_ctrlr_channel *ctrlr_ch, void *ctx)
2346 : {
2347 : struct nvme_qpair *nvme_qpair;
2348 : struct spdk_nvme_qpair *qpair;
2349 :
2350 104 : nvme_qpair = ctrlr_ch->qpair;
2351 104 : assert(nvme_qpair != NULL);
2352 :
2353 104 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
2354 :
2355 104 : qpair = nvme_qpair->qpair;
2356 104 : if (qpair != NULL) {
2357 69 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start disconnecting qpair %p:%u.\n",
2358 : qpair, spdk_nvme_qpair_get_id(qpair));
2359 :
2360 69 : if (nvme_qpair->ctrlr->dont_retry) {
2361 53 : spdk_nvme_qpair_set_abort_dnr(qpair, true);
2362 53 : }
2363 69 : spdk_nvme_ctrlr_disconnect_io_qpair(qpair);
2364 :
2365 : /* The current full reset sequence will move to the next
2366 : * ctrlr_channel after the qpair is actually disconnected.
2367 : */
2368 69 : assert(ctrlr_ch->reset_iter == NULL);
2369 69 : ctrlr_ch->reset_iter = i;
2370 69 : } else {
2371 35 : nvme_ctrlr_for_each_channel_continue(i, 0);
2372 : }
2373 104 : }
2374 :
2375 : static void
2376 36 : bdev_nvme_reset_create_qpairs_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
2377 : {
2378 36 : if (status == 0) {
2379 36 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpairs were created after ctrlr reset.\n");
2380 :
2381 36 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, true);
2382 36 : } else {
2383 0 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpairs were failed to create after ctrlr reset.\n");
2384 :
2385 : /* Delete the added qpairs and quiesce ctrlr to make the states clean. */
2386 0 : nvme_ctrlr_for_each_channel(nvme_ctrlr,
2387 : bdev_nvme_reset_destroy_qpair,
2388 : NULL,
2389 : bdev_nvme_reset_create_qpairs_failed);
2390 : }
2391 36 : }
2392 :
2393 : static int
2394 61 : bdev_nvme_reset_check_qpair_connected(void *ctx)
2395 : {
2396 61 : struct nvme_ctrlr_channel *ctrlr_ch = ctx;
2397 61 : struct nvme_qpair *nvme_qpair = ctrlr_ch->qpair;
2398 : struct spdk_nvme_qpair *qpair;
2399 :
2400 61 : if (ctrlr_ch->reset_iter == NULL) {
2401 : /* qpair was already failed to connect and the reset sequence is being aborted. */
2402 0 : assert(ctrlr_ch->connect_poller == NULL);
2403 0 : assert(nvme_qpair->qpair == NULL);
2404 :
2405 0 : NVME_CTRLR_INFOLOG(nvme_qpair->ctrlr,
2406 : "qpair was already failed to connect. reset is being aborted.\n");
2407 0 : return SPDK_POLLER_BUSY;
2408 : }
2409 :
2410 61 : qpair = nvme_qpair->qpair;
2411 61 : assert(qpair != NULL);
2412 :
2413 61 : if (!spdk_nvme_qpair_is_connected(qpair)) {
2414 0 : return SPDK_POLLER_BUSY;
2415 : }
2416 :
2417 61 : NVME_CTRLR_INFOLOG(nvme_qpair->ctrlr, "qpair %p:%u was connected.\n",
2418 : qpair, spdk_nvme_qpair_get_id(qpair));
2419 :
2420 61 : spdk_poller_unregister(&ctrlr_ch->connect_poller);
2421 :
2422 : /* qpair was completed to connect. Move to the next ctrlr_channel */
2423 61 : nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
2424 61 : ctrlr_ch->reset_iter = NULL;
2425 :
2426 61 : if (!g_opts.disable_auto_failback) {
2427 44 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
2428 44 : }
2429 :
2430 61 : return SPDK_POLLER_BUSY;
2431 61 : }
2432 :
2433 : static void
2434 61 : bdev_nvme_reset_create_qpair(struct nvme_ctrlr_channel_iter *i,
2435 : struct nvme_ctrlr *nvme_ctrlr,
2436 : struct nvme_ctrlr_channel *ctrlr_ch,
2437 : void *ctx)
2438 : {
2439 61 : struct nvme_qpair *nvme_qpair = ctrlr_ch->qpair;
2440 : struct spdk_nvme_qpair *qpair;
2441 : int rc;
2442 :
2443 61 : rc = bdev_nvme_create_qpair(nvme_qpair);
2444 61 : if (rc == 0) {
2445 61 : ctrlr_ch->connect_poller = SPDK_POLLER_REGISTER(bdev_nvme_reset_check_qpair_connected,
2446 : ctrlr_ch, 0);
2447 :
2448 61 : qpair = nvme_qpair->qpair;
2449 :
2450 61 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start checking qpair %p:%u to be connected.\n",
2451 : qpair, spdk_nvme_qpair_get_id(qpair));
2452 :
2453 : /* The current full reset sequence will move to the next
2454 : * ctrlr_channel after the qpair is actually connected.
2455 : */
2456 61 : assert(ctrlr_ch->reset_iter == NULL);
2457 61 : ctrlr_ch->reset_iter = i;
2458 61 : } else {
2459 0 : nvme_ctrlr_for_each_channel_continue(i, rc);
2460 : }
2461 61 : }
2462 :
2463 : static void
2464 36 : nvme_ctrlr_check_namespaces(struct nvme_ctrlr *nvme_ctrlr)
2465 : {
2466 36 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
2467 : struct nvme_ns *nvme_ns;
2468 :
2469 57 : for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
2470 57 : nvme_ns != NULL;
2471 21 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
2472 21 : if (!spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
2473 1 : SPDK_DEBUGLOG(bdev_nvme, "NSID %u was removed during reset.\n", nvme_ns->id);
2474 : /* NS can be added again. Just nullify nvme_ns->ns. */
2475 1 : nvme_ns->ns = NULL;
2476 1 : }
2477 21 : }
2478 36 : }
2479 :
2480 :
2481 : static int
2482 70 : bdev_nvme_reconnect_ctrlr_poll(void *arg)
2483 : {
2484 70 : struct nvme_ctrlr *nvme_ctrlr = arg;
2485 : struct spdk_nvme_transport_id *trid;
2486 70 : int rc = -ETIMEDOUT;
2487 :
2488 70 : if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
2489 : /* Mark the ctrlr as failed. The next call to
2490 : * spdk_nvme_ctrlr_reconnect_poll_async() will then
2491 : * do the necessary cleanup and return failure.
2492 : */
2493 2 : spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
2494 2 : }
2495 :
2496 70 : rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
2497 70 : if (rc == -EAGAIN) {
2498 0 : return SPDK_POLLER_BUSY;
2499 : }
2500 :
2501 70 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
2502 70 : if (rc == 0) {
2503 36 : trid = &nvme_ctrlr->active_path_id->trid;
2504 :
2505 36 : if (spdk_nvme_trtype_is_fabrics(trid->trtype)) {
2506 36 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was connected to %s:%s. Create qpairs.\n",
2507 : trid->traddr, trid->trsvcid);
2508 36 : } else {
2509 0 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was connected. Create qpairs.\n");
2510 : }
2511 :
2512 36 : nvme_ctrlr_check_namespaces(nvme_ctrlr);
2513 :
2514 : /* Recreate all of the I/O queue pairs */
2515 36 : nvme_ctrlr_for_each_channel(nvme_ctrlr,
2516 : bdev_nvme_reset_create_qpair,
2517 : NULL,
2518 : bdev_nvme_reset_create_qpairs_done);
2519 36 : } else {
2520 34 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr could not be connected.\n");
2521 :
2522 34 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2523 : }
2524 70 : return SPDK_POLLER_BUSY;
2525 70 : }
2526 :
2527 : static void
2528 70 : bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2529 : {
2530 70 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start reconnecting ctrlr.\n");
2531 :
2532 70 : spdk_nvme_ctrlr_reconnect_async(nvme_ctrlr->ctrlr);
2533 :
2534 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect, nvme_ctrlr->nbdev_ctrlr->name);
2535 70 : assert(nvme_ctrlr->reset_detach_poller == NULL);
2536 70 : nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_ctrlr_poll,
2537 : nvme_ctrlr, 0);
2538 70 : }
2539 :
2540 : static void
2541 57 : bdev_nvme_reset_destroy_qpair_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
2542 : {
2543 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reset, nvme_ctrlr->nbdev_ctrlr->name);
2544 57 : assert(status == 0);
2545 :
2546 57 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpairs were deleted.\n");
2547 :
2548 57 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2549 0 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2550 0 : } else {
2551 57 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
2552 : }
2553 57 : }
2554 :
2555 : static void
2556 57 : bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
2557 : {
2558 57 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Delete qpairs for reset.\n");
2559 :
2560 57 : nvme_ctrlr_for_each_channel(nvme_ctrlr,
2561 : bdev_nvme_reset_destroy_qpair,
2562 : NULL,
2563 : bdev_nvme_reset_destroy_qpair_done);
2564 57 : }
2565 :
2566 : static void
2567 3 : bdev_nvme_reconnect_ctrlr_now(void *ctx)
2568 : {
2569 3 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2570 :
2571 3 : assert(nvme_ctrlr->resetting == true);
2572 3 : assert(nvme_ctrlr->thread == spdk_get_thread());
2573 :
2574 3 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2575 :
2576 3 : spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
2577 :
2578 3 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2579 3 : }
2580 :
2581 : static void
2582 57 : _bdev_nvme_reset_ctrlr(void *ctx)
2583 : {
2584 57 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2585 :
2586 57 : assert(nvme_ctrlr->resetting == true);
2587 57 : assert(nvme_ctrlr->thread == spdk_get_thread());
2588 :
2589 57 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2590 0 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reset_destroy_qpairs);
2591 0 : } else {
2592 57 : bdev_nvme_reset_destroy_qpairs(nvme_ctrlr);
2593 : }
2594 57 : }
2595 :
2596 : static int
2597 50 : bdev_nvme_reset_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, spdk_msg_fn *msg_fn)
2598 : {
2599 50 : if (nvme_ctrlr->destruct) {
2600 3 : return -ENXIO;
2601 : }
2602 :
2603 47 : if (nvme_ctrlr->resetting) {
2604 14 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Unable to perform reset, already in progress.\n");
2605 14 : return -EBUSY;
2606 : }
2607 :
2608 33 : if (nvme_ctrlr->disabled) {
2609 1 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Unable to perform reset. Controller is disabled.\n");
2610 1 : return -EALREADY;
2611 : }
2612 :
2613 32 : nvme_ctrlr->resetting = true;
2614 32 : nvme_ctrlr->dont_retry = true;
2615 :
2616 32 : if (nvme_ctrlr->reconnect_is_delayed) {
2617 1 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "Reconnect is already scheduled.\n");
2618 1 : *msg_fn = bdev_nvme_reconnect_ctrlr_now;
2619 1 : nvme_ctrlr->reconnect_is_delayed = false;
2620 1 : } else {
2621 31 : *msg_fn = _bdev_nvme_reset_ctrlr;
2622 31 : assert(nvme_ctrlr->reset_start_tsc == 0);
2623 : }
2624 :
2625 32 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2626 :
2627 32 : return 0;
2628 50 : }
2629 :
2630 : static int
2631 24 : bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2632 : {
2633 : spdk_msg_fn msg_fn;
2634 : int rc;
2635 :
2636 24 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2637 24 : rc = bdev_nvme_reset_ctrlr_unsafe(nvme_ctrlr, &msg_fn);
2638 24 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2639 :
2640 24 : if (rc == 0) {
2641 19 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
2642 19 : }
2643 :
2644 24 : return rc;
2645 : }
2646 :
2647 : static int
2648 3 : bdev_nvme_enable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2649 : {
2650 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2651 3 : if (nvme_ctrlr->destruct) {
2652 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2653 0 : return -ENXIO;
2654 : }
2655 :
2656 3 : if (nvme_ctrlr->resetting) {
2657 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2658 0 : return -EBUSY;
2659 : }
2660 :
2661 3 : if (!nvme_ctrlr->disabled) {
2662 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2663 1 : return -EALREADY;
2664 : }
2665 :
2666 2 : nvme_ctrlr->disabled = false;
2667 2 : nvme_ctrlr->resetting = true;
2668 :
2669 2 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2670 :
2671 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2672 :
2673 2 : spdk_thread_send_msg(nvme_ctrlr->thread, bdev_nvme_reconnect_ctrlr_now, nvme_ctrlr);
2674 2 : return 0;
2675 3 : }
2676 :
2677 : static void
2678 2 : bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr)
2679 : {
2680 2 : bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
2681 2 : void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
2682 : enum bdev_nvme_op_after_reset op_after_disable;
2683 :
2684 2 : assert(nvme_ctrlr->thread == spdk_get_thread());
2685 :
2686 2 : nvme_ctrlr->ctrlr_op_cb_fn = NULL;
2687 2 : nvme_ctrlr->ctrlr_op_cb_arg = NULL;
2688 :
2689 2 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2690 :
2691 2 : nvme_ctrlr->resetting = false;
2692 2 : nvme_ctrlr->dont_retry = false;
2693 :
2694 2 : op_after_disable = bdev_nvme_check_op_after_reset(nvme_ctrlr, true);
2695 :
2696 2 : nvme_ctrlr->disabled = true;
2697 2 : spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
2698 :
2699 : /* Make sure we clear any pending resets before returning. */
2700 2 : bdev_nvme_complete_pending_resets(nvme_ctrlr, true);
2701 :
2702 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2703 :
2704 2 : if (ctrlr_op_cb_fn) {
2705 0 : ctrlr_op_cb_fn(ctrlr_op_cb_arg, 0);
2706 0 : }
2707 :
2708 2 : switch (op_after_disable) {
2709 : case OP_COMPLETE_PENDING_DESTRUCT:
2710 0 : nvme_ctrlr_unregister(nvme_ctrlr);
2711 0 : break;
2712 : default:
2713 2 : break;
2714 : }
2715 2 : }
2716 :
2717 : static void
2718 1 : bdev_nvme_disable_destroy_qpairs_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
2719 : {
2720 1 : assert(status == 0);
2721 :
2722 1 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2723 0 : bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
2724 0 : } else {
2725 1 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_ctrlr_complete);
2726 : }
2727 1 : }
2728 :
2729 : static void
2730 1 : bdev_nvme_disable_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
2731 : {
2732 1 : nvme_ctrlr_for_each_channel(nvme_ctrlr,
2733 : bdev_nvme_reset_destroy_qpair,
2734 : NULL,
2735 : bdev_nvme_disable_destroy_qpairs_done);
2736 1 : }
2737 :
2738 : static void
2739 1 : _bdev_nvme_cancel_reconnect_and_disable_ctrlr(void *ctx)
2740 : {
2741 1 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2742 :
2743 1 : assert(nvme_ctrlr->resetting == true);
2744 1 : assert(nvme_ctrlr->thread == spdk_get_thread());
2745 :
2746 1 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2747 :
2748 1 : bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
2749 1 : }
2750 :
2751 : static void
2752 1 : _bdev_nvme_disconnect_and_disable_ctrlr(void *ctx)
2753 : {
2754 1 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2755 :
2756 1 : assert(nvme_ctrlr->resetting == true);
2757 1 : assert(nvme_ctrlr->thread == spdk_get_thread());
2758 :
2759 1 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2760 0 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_destroy_qpairs);
2761 0 : } else {
2762 1 : bdev_nvme_disable_destroy_qpairs(nvme_ctrlr);
2763 : }
2764 1 : }
2765 :
2766 : static int
2767 5 : bdev_nvme_disable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2768 : {
2769 : spdk_msg_fn msg_fn;
2770 :
2771 5 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2772 5 : if (nvme_ctrlr->destruct) {
2773 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2774 1 : return -ENXIO;
2775 : }
2776 :
2777 4 : if (nvme_ctrlr->resetting) {
2778 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2779 1 : return -EBUSY;
2780 : }
2781 :
2782 3 : if (nvme_ctrlr->disabled) {
2783 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2784 1 : return -EALREADY;
2785 : }
2786 :
2787 2 : nvme_ctrlr->resetting = true;
2788 2 : nvme_ctrlr->dont_retry = true;
2789 :
2790 2 : if (nvme_ctrlr->reconnect_is_delayed) {
2791 1 : msg_fn = _bdev_nvme_cancel_reconnect_and_disable_ctrlr;
2792 1 : nvme_ctrlr->reconnect_is_delayed = false;
2793 1 : } else {
2794 1 : msg_fn = _bdev_nvme_disconnect_and_disable_ctrlr;
2795 : }
2796 :
2797 2 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2798 :
2799 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2800 :
2801 2 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
2802 2 : return 0;
2803 5 : }
2804 :
2805 : static int
2806 6 : nvme_ctrlr_op(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
2807 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2808 : {
2809 : int rc;
2810 :
2811 6 : switch (op) {
2812 : case NVME_CTRLR_OP_RESET:
2813 5 : rc = bdev_nvme_reset_ctrlr(nvme_ctrlr);
2814 5 : break;
2815 : case NVME_CTRLR_OP_ENABLE:
2816 0 : rc = bdev_nvme_enable_ctrlr(nvme_ctrlr);
2817 0 : break;
2818 : case NVME_CTRLR_OP_DISABLE:
2819 0 : rc = bdev_nvme_disable_ctrlr(nvme_ctrlr);
2820 0 : break;
2821 : default:
2822 1 : rc = -EINVAL;
2823 1 : break;
2824 : }
2825 :
2826 6 : if (rc == 0) {
2827 3 : assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
2828 3 : assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
2829 3 : nvme_ctrlr->ctrlr_op_cb_fn = cb_fn;
2830 3 : nvme_ctrlr->ctrlr_op_cb_arg = cb_arg;
2831 3 : }
2832 6 : return rc;
2833 : }
2834 :
2835 : struct nvme_ctrlr_op_rpc_ctx {
2836 : struct nvme_ctrlr *nvme_ctrlr;
2837 : struct spdk_thread *orig_thread;
2838 : enum nvme_ctrlr_op op;
2839 : int rc;
2840 : bdev_nvme_ctrlr_op_cb cb_fn;
2841 : void *cb_arg;
2842 : };
2843 :
2844 : static void
2845 4 : _nvme_ctrlr_op_rpc_complete(void *_ctx)
2846 : {
2847 4 : struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
2848 :
2849 4 : assert(ctx != NULL);
2850 4 : assert(ctx->cb_fn != NULL);
2851 :
2852 4 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
2853 :
2854 4 : free(ctx);
2855 4 : }
2856 :
2857 : static void
2858 4 : nvme_ctrlr_op_rpc_complete(void *cb_arg, int rc)
2859 : {
2860 4 : struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
2861 :
2862 4 : ctx->rc = rc;
2863 :
2864 4 : spdk_thread_send_msg(ctx->orig_thread, _nvme_ctrlr_op_rpc_complete, ctx);
2865 4 : }
2866 :
2867 : void
2868 4 : nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
2869 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2870 : {
2871 : struct nvme_ctrlr_op_rpc_ctx *ctx;
2872 : int rc;
2873 :
2874 4 : assert(cb_fn != NULL);
2875 :
2876 4 : ctx = calloc(1, sizeof(*ctx));
2877 4 : if (ctx == NULL) {
2878 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
2879 0 : cb_fn(cb_arg, -ENOMEM);
2880 0 : return;
2881 : }
2882 :
2883 4 : ctx->orig_thread = spdk_get_thread();
2884 4 : ctx->cb_fn = cb_fn;
2885 4 : ctx->cb_arg = cb_arg;
2886 :
2887 4 : rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_ctrlr_op_rpc_complete, ctx);
2888 4 : if (rc == 0) {
2889 1 : return;
2890 3 : } else if (rc == -EALREADY) {
2891 0 : rc = 0;
2892 0 : }
2893 :
2894 3 : nvme_ctrlr_op_rpc_complete(ctx, rc);
2895 4 : }
2896 :
2897 : static void nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc);
2898 :
2899 : static void
2900 2 : _nvme_bdev_ctrlr_op_rpc_continue(void *_ctx)
2901 : {
2902 2 : struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
2903 : struct nvme_ctrlr *prev_nvme_ctrlr, *next_nvme_ctrlr;
2904 : int rc;
2905 :
2906 2 : prev_nvme_ctrlr = ctx->nvme_ctrlr;
2907 2 : ctx->nvme_ctrlr = NULL;
2908 :
2909 2 : if (ctx->rc != 0) {
2910 0 : goto complete;
2911 : }
2912 :
2913 2 : next_nvme_ctrlr = TAILQ_NEXT(prev_nvme_ctrlr, tailq);
2914 2 : if (next_nvme_ctrlr == NULL) {
2915 1 : goto complete;
2916 : }
2917 :
2918 1 : rc = nvme_ctrlr_op(next_nvme_ctrlr, ctx->op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
2919 1 : if (rc == 0) {
2920 1 : ctx->nvme_ctrlr = next_nvme_ctrlr;
2921 1 : return;
2922 0 : } else if (rc == -EALREADY) {
2923 0 : ctx->nvme_ctrlr = next_nvme_ctrlr;
2924 0 : rc = 0;
2925 0 : }
2926 :
2927 0 : ctx->rc = rc;
2928 :
2929 : complete:
2930 1 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
2931 1 : free(ctx);
2932 2 : }
2933 :
2934 : static void
2935 2 : nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc)
2936 : {
2937 2 : struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
2938 :
2939 2 : ctx->rc = rc;
2940 :
2941 2 : spdk_thread_send_msg(ctx->orig_thread, _nvme_bdev_ctrlr_op_rpc_continue, ctx);
2942 2 : }
2943 :
2944 : void
2945 1 : nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op,
2946 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2947 : {
2948 : struct nvme_ctrlr_op_rpc_ctx *ctx;
2949 : struct nvme_ctrlr *nvme_ctrlr;
2950 : int rc;
2951 :
2952 1 : assert(cb_fn != NULL);
2953 :
2954 1 : ctx = calloc(1, sizeof(*ctx));
2955 1 : if (ctx == NULL) {
2956 0 : SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
2957 0 : cb_fn(cb_arg, -ENOMEM);
2958 0 : return;
2959 : }
2960 :
2961 1 : ctx->orig_thread = spdk_get_thread();
2962 1 : ctx->op = op;
2963 1 : ctx->cb_fn = cb_fn;
2964 1 : ctx->cb_arg = cb_arg;
2965 :
2966 1 : nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
2967 1 : assert(nvme_ctrlr != NULL);
2968 :
2969 1 : rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
2970 1 : if (rc == 0) {
2971 1 : ctx->nvme_ctrlr = nvme_ctrlr;
2972 1 : return;
2973 0 : } else if (rc == -EALREADY) {
2974 0 : ctx->nvme_ctrlr = nvme_ctrlr;
2975 0 : rc = 0;
2976 0 : }
2977 :
2978 0 : nvme_bdev_ctrlr_op_rpc_continue(ctx, rc);
2979 1 : }
2980 :
2981 : static int _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio);
2982 :
2983 : static void
2984 16 : bdev_nvme_unfreeze_bdev_channel_done(struct nvme_bdev *nbdev, void *ctx, int status)
2985 : {
2986 16 : struct nvme_bdev_io *bio = ctx;
2987 : enum spdk_bdev_io_status io_status;
2988 :
2989 16 : if (bio->cpl.cdw0 == 0) {
2990 12 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
2991 12 : } else {
2992 4 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
2993 : }
2994 :
2995 16 : NVME_BDEV_INFOLOG(nbdev, "reset_io %p completed, status:%d\n", bio, io_status);
2996 :
2997 16 : __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), io_status, NULL);
2998 16 : }
2999 :
3000 : static void
3001 32 : bdev_nvme_unfreeze_bdev_channel(struct nvme_bdev_channel_iter *i,
3002 : struct nvme_bdev *nbdev,
3003 : struct nvme_bdev_channel *nbdev_ch, void *ctx)
3004 : {
3005 32 : bdev_nvme_abort_retry_ios(nbdev_ch);
3006 32 : nbdev_ch->resetting = false;
3007 :
3008 32 : nvme_bdev_for_each_channel_continue(i, 0);
3009 32 : }
3010 :
3011 : static void
3012 16 : bdev_nvme_reset_io_complete(struct nvme_bdev_io *bio)
3013 : {
3014 16 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
3015 16 : struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
3016 :
3017 : /* Abort all queued I/Os for retry. */
3018 32 : nvme_bdev_for_each_channel(nbdev,
3019 : bdev_nvme_unfreeze_bdev_channel,
3020 16 : bio,
3021 : bdev_nvme_unfreeze_bdev_channel_done);
3022 16 : }
3023 :
3024 : static void
3025 26 : _bdev_nvme_reset_io_continue(void *ctx)
3026 : {
3027 26 : struct nvme_bdev_io *bio = ctx;
3028 : struct nvme_io_path *prev_io_path, *next_io_path;
3029 : int rc;
3030 :
3031 26 : prev_io_path = bio->io_path;
3032 26 : bio->io_path = NULL;
3033 :
3034 26 : next_io_path = STAILQ_NEXT(prev_io_path, stailq);
3035 26 : if (next_io_path == NULL) {
3036 16 : goto complete;
3037 : }
3038 :
3039 10 : rc = _bdev_nvme_reset_io(next_io_path, bio);
3040 10 : if (rc == 0) {
3041 10 : return;
3042 : }
3043 :
3044 : complete:
3045 16 : bdev_nvme_reset_io_complete(bio);
3046 26 : }
3047 :
3048 : static void
3049 26 : bdev_nvme_reset_io_continue(void *cb_arg, int rc)
3050 : {
3051 26 : struct nvme_bdev_io *bio = cb_arg;
3052 26 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
3053 26 : struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
3054 :
3055 26 : NVME_BDEV_INFOLOG(nbdev, "continue reset_io %p, rc:%d\n", bio, rc);
3056 :
3057 : /* Reset status is initialized as "failed". Set to "success" once we have at least one
3058 : * successfully reset nvme_ctrlr.
3059 : */
3060 26 : if (rc == 0) {
3061 16 : bio->cpl.cdw0 = 0;
3062 16 : }
3063 :
3064 26 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), _bdev_nvme_reset_io_continue, bio);
3065 26 : }
3066 :
3067 : static int
3068 26 : _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
3069 : {
3070 26 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
3071 26 : struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
3072 26 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
3073 : spdk_msg_fn msg_fn;
3074 : int rc;
3075 :
3076 26 : assert(bio->io_path == NULL);
3077 26 : bio->io_path = io_path;
3078 :
3079 26 : pthread_mutex_lock(&nvme_ctrlr->mutex);
3080 26 : rc = bdev_nvme_reset_ctrlr_unsafe(nvme_ctrlr, &msg_fn);
3081 26 : if (rc == -EBUSY) {
3082 : /*
3083 : * Reset call is queued only if it is from the app framework. This is on purpose so that
3084 : * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
3085 : * upper level. If they are in the middle of a reset, we won't try to schedule another one.
3086 : */
3087 12 : TAILQ_INSERT_TAIL(&nvme_ctrlr->pending_resets, bio, retry_link);
3088 12 : }
3089 26 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
3090 :
3091 26 : if (rc == 0) {
3092 13 : assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
3093 13 : assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
3094 13 : nvme_ctrlr->ctrlr_op_cb_fn = bdev_nvme_reset_io_continue;
3095 13 : nvme_ctrlr->ctrlr_op_cb_arg = bio;
3096 :
3097 13 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
3098 :
3099 13 : NVME_BDEV_INFOLOG(nbdev, "reset_io %p started resetting ctrlr [%s, %u].\n",
3100 : bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr));
3101 26 : } else if (rc == -EBUSY) {
3102 12 : rc = 0;
3103 :
3104 12 : NVME_BDEV_INFOLOG(nbdev, "reset_io %p was queued to ctrlr [%s, %u].\n",
3105 : bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr));
3106 12 : } else {
3107 1 : NVME_BDEV_INFOLOG(nbdev, "reset_io %p could not reset ctrlr [%s, %u], rc:%d\n",
3108 : bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr), rc);
3109 : }
3110 :
3111 26 : return rc;
3112 : }
3113 :
3114 : static void
3115 16 : bdev_nvme_freeze_bdev_channel_done(struct nvme_bdev *nbdev, void *ctx, int status)
3116 : {
3117 16 : struct nvme_bdev_io *bio = ctx;
3118 16 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
3119 : struct nvme_bdev_channel *nbdev_ch;
3120 : struct nvme_io_path *io_path;
3121 : int rc;
3122 :
3123 16 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
3124 :
3125 : /* Initialize with failed status. With multipath it is enough to have at least one successful
3126 : * nvme_ctrlr reset. If there is none, reset status will remain failed.
3127 : */
3128 16 : bio->cpl.cdw0 = 1;
3129 :
3130 : /* Reset all nvme_ctrlrs of a bdev controller sequentially. */
3131 16 : io_path = STAILQ_FIRST(&nbdev_ch->io_path_list);
3132 16 : assert(io_path != NULL);
3133 :
3134 16 : rc = _bdev_nvme_reset_io(io_path, bio);
3135 16 : if (rc != 0) {
3136 : /* If the current nvme_ctrlr is disabled, skip it and move to the next nvme_ctrlr. */
3137 1 : rc = (rc == -EALREADY) ? 0 : rc;
3138 :
3139 1 : bdev_nvme_reset_io_continue(bio, rc);
3140 1 : }
3141 16 : }
3142 :
3143 : static void
3144 30 : bdev_nvme_freeze_bdev_channel(struct nvme_bdev_channel_iter *i,
3145 : struct nvme_bdev *nbdev,
3146 : struct nvme_bdev_channel *nbdev_ch, void *ctx)
3147 : {
3148 30 : nbdev_ch->resetting = true;
3149 :
3150 30 : nvme_bdev_for_each_channel_continue(i, 0);
3151 30 : }
3152 :
3153 : static void
3154 15 : bdev_nvme_reset_io(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio)
3155 : {
3156 15 : NVME_BDEV_INFOLOG(nbdev, "reset_io %p started.\n", bio);
3157 :
3158 30 : nvme_bdev_for_each_channel(nbdev,
3159 : bdev_nvme_freeze_bdev_channel,
3160 15 : bio,
3161 : bdev_nvme_freeze_bdev_channel_done);
3162 15 : }
3163 :
3164 : static int
3165 32 : bdev_nvme_failover_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool remove)
3166 : {
3167 32 : if (nvme_ctrlr->destruct) {
3168 : /* Don't bother resetting if the controller is in the process of being destructed. */
3169 2 : return -ENXIO;
3170 : }
3171 :
3172 30 : if (nvme_ctrlr->resetting) {
3173 3 : if (!nvme_ctrlr->in_failover) {
3174 3 : NVME_CTRLR_NOTICELOG(nvme_ctrlr,
3175 : "Reset is already in progress. Defer failover until reset completes.\n");
3176 :
3177 : /* Defer failover until reset completes. */
3178 3 : nvme_ctrlr->pending_failover = true;
3179 3 : return -EINPROGRESS;
3180 : } else {
3181 0 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Unable to perform failover, already in progress.\n");
3182 0 : return -EBUSY;
3183 : }
3184 : }
3185 :
3186 27 : bdev_nvme_failover_trid(nvme_ctrlr, remove, true);
3187 :
3188 27 : if (nvme_ctrlr->reconnect_is_delayed) {
3189 1 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Reconnect is already scheduled.\n");
3190 :
3191 : /* We rely on the next reconnect for the failover. */
3192 1 : return -EALREADY;
3193 : }
3194 :
3195 26 : if (nvme_ctrlr->disabled) {
3196 0 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Controller is disabled.\n");
3197 :
3198 : /* We rely on the enablement for the failover. */
3199 0 : return -EALREADY;
3200 : }
3201 :
3202 26 : nvme_ctrlr->resetting = true;
3203 26 : nvme_ctrlr->in_failover = true;
3204 :
3205 26 : assert(nvme_ctrlr->reset_start_tsc == 0);
3206 26 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
3207 :
3208 26 : return 0;
3209 32 : }
3210 :
3211 : static int
3212 30 : bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
3213 : {
3214 : int rc;
3215 :
3216 30 : pthread_mutex_lock(&nvme_ctrlr->mutex);
3217 30 : rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, false);
3218 30 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
3219 :
3220 30 : if (rc == 0) {
3221 25 : spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset_ctrlr, nvme_ctrlr);
3222 30 : } else if (rc == -EALREADY) {
3223 0 : rc = 0;
3224 0 : }
3225 :
3226 30 : return rc;
3227 : }
3228 :
3229 : static int bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks,
3230 : uint64_t num_blocks);
3231 :
3232 : static int bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks,
3233 : uint64_t num_blocks);
3234 :
3235 : static int bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks,
3236 : uint64_t src_offset_blocks,
3237 : uint64_t num_blocks);
3238 :
3239 : static void
3240 1 : bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
3241 : bool success)
3242 : {
3243 1 : struct nvme_bdev_io *bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
3244 : int ret;
3245 :
3246 1 : if (!success) {
3247 0 : ret = -EINVAL;
3248 0 : goto exit;
3249 : }
3250 :
3251 1 : if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
3252 0 : ret = -ENXIO;
3253 0 : goto exit;
3254 : }
3255 :
3256 2 : ret = bdev_nvme_readv(bio,
3257 1 : bdev_io->u.bdev.iovs,
3258 1 : bdev_io->u.bdev.iovcnt,
3259 1 : bdev_io->u.bdev.md_buf,
3260 1 : bdev_io->u.bdev.num_blocks,
3261 1 : bdev_io->u.bdev.offset_blocks,
3262 1 : bdev_io->u.bdev.dif_check_flags,
3263 1 : bdev_io->u.bdev.memory_domain,
3264 1 : bdev_io->u.bdev.memory_domain_ctx,
3265 1 : bdev_io->u.bdev.accel_sequence);
3266 :
3267 : exit:
3268 1 : if (spdk_unlikely(ret != 0)) {
3269 0 : bdev_nvme_io_complete(bio, ret);
3270 0 : }
3271 1 : }
3272 :
3273 : static inline void
3274 59 : _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
3275 : {
3276 59 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
3277 59 : struct spdk_bdev *bdev = bdev_io->bdev;
3278 : struct nvme_bdev_io *nbdev_io_to_abort;
3279 59 : int rc = 0;
3280 :
3281 59 : switch (bdev_io->type) {
3282 : case SPDK_BDEV_IO_TYPE_READ:
3283 3 : if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) {
3284 :
3285 4 : rc = bdev_nvme_readv(nbdev_io,
3286 2 : bdev_io->u.bdev.iovs,
3287 2 : bdev_io->u.bdev.iovcnt,
3288 2 : bdev_io->u.bdev.md_buf,
3289 2 : bdev_io->u.bdev.num_blocks,
3290 2 : bdev_io->u.bdev.offset_blocks,
3291 2 : bdev_io->u.bdev.dif_check_flags,
3292 2 : bdev_io->u.bdev.memory_domain,
3293 2 : bdev_io->u.bdev.memory_domain_ctx,
3294 2 : bdev_io->u.bdev.accel_sequence);
3295 2 : } else {
3296 2 : spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
3297 1 : bdev_io->u.bdev.num_blocks * bdev->blocklen);
3298 1 : rc = 0;
3299 : }
3300 3 : break;
3301 : case SPDK_BDEV_IO_TYPE_WRITE:
3302 50 : rc = bdev_nvme_writev(nbdev_io,
3303 25 : bdev_io->u.bdev.iovs,
3304 25 : bdev_io->u.bdev.iovcnt,
3305 25 : bdev_io->u.bdev.md_buf,
3306 25 : bdev_io->u.bdev.num_blocks,
3307 25 : bdev_io->u.bdev.offset_blocks,
3308 25 : bdev_io->u.bdev.dif_check_flags,
3309 25 : bdev_io->u.bdev.memory_domain,
3310 25 : bdev_io->u.bdev.memory_domain_ctx,
3311 25 : bdev_io->u.bdev.accel_sequence,
3312 25 : bdev_io->u.bdev.nvme_cdw12,
3313 25 : bdev_io->u.bdev.nvme_cdw13);
3314 25 : break;
3315 : case SPDK_BDEV_IO_TYPE_COMPARE:
3316 2 : rc = bdev_nvme_comparev(nbdev_io,
3317 1 : bdev_io->u.bdev.iovs,
3318 1 : bdev_io->u.bdev.iovcnt,
3319 1 : bdev_io->u.bdev.md_buf,
3320 1 : bdev_io->u.bdev.num_blocks,
3321 1 : bdev_io->u.bdev.offset_blocks,
3322 1 : bdev_io->u.bdev.dif_check_flags);
3323 1 : break;
3324 : case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
3325 4 : rc = bdev_nvme_comparev_and_writev(nbdev_io,
3326 2 : bdev_io->u.bdev.iovs,
3327 2 : bdev_io->u.bdev.iovcnt,
3328 2 : bdev_io->u.bdev.fused_iovs,
3329 2 : bdev_io->u.bdev.fused_iovcnt,
3330 2 : bdev_io->u.bdev.md_buf,
3331 2 : bdev_io->u.bdev.num_blocks,
3332 2 : bdev_io->u.bdev.offset_blocks,
3333 2 : bdev_io->u.bdev.dif_check_flags);
3334 2 : break;
3335 : case SPDK_BDEV_IO_TYPE_UNMAP:
3336 2 : rc = bdev_nvme_unmap(nbdev_io,
3337 1 : bdev_io->u.bdev.offset_blocks,
3338 1 : bdev_io->u.bdev.num_blocks);
3339 1 : break;
3340 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3341 0 : rc = bdev_nvme_write_zeroes(nbdev_io,
3342 0 : bdev_io->u.bdev.offset_blocks,
3343 0 : bdev_io->u.bdev.num_blocks);
3344 0 : break;
3345 : case SPDK_BDEV_IO_TYPE_RESET:
3346 15 : nbdev_io->io_path = NULL;
3347 15 : bdev_nvme_reset_io(bdev->ctxt, nbdev_io);
3348 15 : return;
3349 :
3350 : case SPDK_BDEV_IO_TYPE_FLUSH:
3351 1 : bdev_nvme_io_complete(nbdev_io, 0);
3352 1 : return;
3353 :
3354 : case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
3355 0 : rc = bdev_nvme_zone_appendv(nbdev_io,
3356 0 : bdev_io->u.bdev.iovs,
3357 0 : bdev_io->u.bdev.iovcnt,
3358 0 : bdev_io->u.bdev.md_buf,
3359 0 : bdev_io->u.bdev.num_blocks,
3360 0 : bdev_io->u.bdev.offset_blocks,
3361 0 : bdev_io->u.bdev.dif_check_flags);
3362 0 : break;
3363 : case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
3364 0 : rc = bdev_nvme_get_zone_info(nbdev_io,
3365 0 : bdev_io->u.zone_mgmt.zone_id,
3366 0 : bdev_io->u.zone_mgmt.num_zones,
3367 0 : bdev_io->u.zone_mgmt.buf);
3368 0 : break;
3369 : case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
3370 0 : rc = bdev_nvme_zone_management(nbdev_io,
3371 0 : bdev_io->u.zone_mgmt.zone_id,
3372 0 : bdev_io->u.zone_mgmt.zone_action);
3373 0 : break;
3374 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3375 5 : nbdev_io->io_path = NULL;
3376 10 : bdev_nvme_admin_passthru(nbdev_ch,
3377 5 : nbdev_io,
3378 5 : &bdev_io->u.nvme_passthru.cmd,
3379 5 : bdev_io->u.nvme_passthru.buf,
3380 5 : bdev_io->u.nvme_passthru.nbytes);
3381 5 : return;
3382 :
3383 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3384 0 : rc = bdev_nvme_io_passthru(nbdev_io,
3385 0 : &bdev_io->u.nvme_passthru.cmd,
3386 0 : bdev_io->u.nvme_passthru.buf,
3387 0 : bdev_io->u.nvme_passthru.nbytes);
3388 0 : break;
3389 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3390 0 : rc = bdev_nvme_io_passthru_md(nbdev_io,
3391 0 : &bdev_io->u.nvme_passthru.cmd,
3392 0 : bdev_io->u.nvme_passthru.buf,
3393 0 : bdev_io->u.nvme_passthru.nbytes,
3394 0 : bdev_io->u.nvme_passthru.md_buf,
3395 0 : bdev_io->u.nvme_passthru.md_len);
3396 0 : break;
3397 : case SPDK_BDEV_IO_TYPE_NVME_IOV_MD:
3398 0 : rc = bdev_nvme_iov_passthru_md(nbdev_io,
3399 0 : &bdev_io->u.nvme_passthru.cmd,
3400 0 : bdev_io->u.nvme_passthru.iovs,
3401 0 : bdev_io->u.nvme_passthru.iovcnt,
3402 0 : bdev_io->u.nvme_passthru.nbytes,
3403 0 : bdev_io->u.nvme_passthru.md_buf,
3404 0 : bdev_io->u.nvme_passthru.md_len);
3405 0 : break;
3406 : case SPDK_BDEV_IO_TYPE_ABORT:
3407 6 : nbdev_io->io_path = NULL;
3408 6 : nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
3409 12 : bdev_nvme_abort(nbdev_ch,
3410 6 : nbdev_io,
3411 6 : nbdev_io_to_abort);
3412 6 : return;
3413 :
3414 : case SPDK_BDEV_IO_TYPE_COPY:
3415 0 : rc = bdev_nvme_copy(nbdev_io,
3416 0 : bdev_io->u.bdev.offset_blocks,
3417 0 : bdev_io->u.bdev.copy.src_offset_blocks,
3418 0 : bdev_io->u.bdev.num_blocks);
3419 0 : break;
3420 : default:
3421 0 : rc = -EINVAL;
3422 0 : break;
3423 : }
3424 :
3425 32 : if (spdk_unlikely(rc != 0)) {
3426 0 : bdev_nvme_io_complete(nbdev_io, rc);
3427 0 : }
3428 59 : }
3429 :
3430 : static void
3431 68 : bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
3432 : {
3433 68 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
3434 68 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
3435 :
3436 68 : if (spdk_likely(nbdev_io->submit_tsc == 0)) {
3437 68 : nbdev_io->submit_tsc = spdk_bdev_io_get_submit_tsc(bdev_io);
3438 68 : } else {
3439 : /* There are cases where submit_tsc != 0, i.e. retry I/O.
3440 : * We need to update submit_tsc here.
3441 : */
3442 0 : nbdev_io->submit_tsc = spdk_get_ticks();
3443 : }
3444 :
3445 68 : spdk_trace_record(TRACE_BDEV_NVME_IO_START, 0, 0, (uintptr_t)nbdev_io, (uintptr_t)bdev_io);
3446 68 : nbdev_io->io_path = bdev_nvme_find_io_path(nbdev_ch);
3447 68 : if (spdk_unlikely(!nbdev_io->io_path)) {
3448 13 : if (!bdev_nvme_io_type_is_admin(bdev_io->type)) {
3449 12 : bdev_nvme_io_complete(nbdev_io, -ENXIO);
3450 12 : return;
3451 : }
3452 :
3453 : /* Admin commands do not use the optimal I/O path.
3454 : * Simply fall through even if it is not found.
3455 : */
3456 1 : }
3457 :
3458 56 : _bdev_nvme_submit_request(nbdev_ch, bdev_io);
3459 68 : }
3460 :
3461 : static bool
3462 0 : bdev_nvme_is_supported_csi(enum spdk_nvme_csi csi)
3463 : {
3464 0 : switch (csi) {
3465 : case SPDK_NVME_CSI_NVM:
3466 0 : return true;
3467 : case SPDK_NVME_CSI_ZNS:
3468 0 : return true;
3469 : default:
3470 0 : return false;
3471 : }
3472 0 : }
3473 :
3474 : static bool
3475 0 : bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
3476 : {
3477 0 : struct nvme_bdev *nbdev = ctx;
3478 : struct nvme_ns *nvme_ns;
3479 : struct spdk_nvme_ns *ns;
3480 : struct spdk_nvme_ctrlr *ctrlr;
3481 : const struct spdk_nvme_ctrlr_data *cdata;
3482 :
3483 0 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
3484 0 : assert(nvme_ns != NULL);
3485 0 : ns = nvme_ns->ns;
3486 0 : if (ns == NULL) {
3487 0 : return false;
3488 : }
3489 :
3490 0 : if (!bdev_nvme_is_supported_csi(spdk_nvme_ns_get_csi(ns))) {
3491 0 : switch (io_type) {
3492 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3493 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3494 0 : return true;
3495 :
3496 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3497 0 : return spdk_nvme_ns_get_md_size(ns) ? true : false;
3498 :
3499 : default:
3500 0 : return false;
3501 : }
3502 : }
3503 :
3504 0 : ctrlr = spdk_nvme_ns_get_ctrlr(ns);
3505 :
3506 0 : switch (io_type) {
3507 : case SPDK_BDEV_IO_TYPE_READ:
3508 : case SPDK_BDEV_IO_TYPE_WRITE:
3509 : case SPDK_BDEV_IO_TYPE_RESET:
3510 : case SPDK_BDEV_IO_TYPE_FLUSH:
3511 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3512 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3513 : case SPDK_BDEV_IO_TYPE_ABORT:
3514 0 : return true;
3515 :
3516 : case SPDK_BDEV_IO_TYPE_COMPARE:
3517 0 : return spdk_nvme_ns_supports_compare(ns);
3518 :
3519 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3520 0 : return spdk_nvme_ns_get_md_size(ns) ? true : false;
3521 :
3522 : case SPDK_BDEV_IO_TYPE_UNMAP:
3523 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3524 0 : return cdata->oncs.dsm;
3525 :
3526 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3527 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3528 0 : return cdata->oncs.write_zeroes;
3529 :
3530 : case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
3531 0 : if (spdk_nvme_ctrlr_get_flags(ctrlr) &
3532 : SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) {
3533 0 : return true;
3534 : }
3535 0 : return false;
3536 :
3537 : case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
3538 : case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
3539 0 : return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS;
3540 :
3541 : case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
3542 0 : return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS &&
3543 0 : spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
3544 :
3545 : case SPDK_BDEV_IO_TYPE_COPY:
3546 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3547 0 : return cdata->oncs.copy;
3548 :
3549 : default:
3550 0 : return false;
3551 : }
3552 0 : }
3553 :
3554 : static int
3555 61 : nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrlr_ch)
3556 : {
3557 : struct nvme_qpair *nvme_qpair;
3558 : struct spdk_io_channel *pg_ch;
3559 : int rc;
3560 :
3561 61 : nvme_qpair = calloc(1, sizeof(*nvme_qpair));
3562 61 : if (!nvme_qpair) {
3563 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to alloc nvme_qpair.\n");
3564 0 : return -1;
3565 : }
3566 :
3567 61 : TAILQ_INIT(&nvme_qpair->io_path_list);
3568 :
3569 61 : nvme_qpair->ctrlr = nvme_ctrlr;
3570 61 : nvme_qpair->ctrlr_ch = ctrlr_ch;
3571 :
3572 61 : pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);
3573 61 : if (!pg_ch) {
3574 0 : free(nvme_qpair);
3575 0 : return -1;
3576 : }
3577 :
3578 61 : nvme_qpair->group = spdk_io_channel_get_ctx(pg_ch);
3579 :
3580 : #ifdef SPDK_CONFIG_VTUNE
3581 : nvme_qpair->group->collect_spin_stat = true;
3582 : #else
3583 61 : nvme_qpair->group->collect_spin_stat = false;
3584 : #endif
3585 :
3586 61 : if (!nvme_ctrlr->disabled) {
3587 : /* If a nvme_ctrlr is disabled, don't try to create qpair for it. Qpair will
3588 : * be created when it's enabled.
3589 : */
3590 61 : rc = bdev_nvme_create_qpair(nvme_qpair);
3591 61 : if (rc != 0) {
3592 : /* nvme_ctrlr can't create IO qpair if connection is down.
3593 : * If reconnect_delay_sec is non-zero, creating IO qpair is retried
3594 : * after reconnect_delay_sec seconds. If bdev_retry_count is non-zero,
3595 : * submitted IO will be queued until IO qpair is successfully created.
3596 : *
3597 : * Hence, if both are satisfied, ignore the failure.
3598 : */
3599 0 : if (nvme_ctrlr->opts.reconnect_delay_sec == 0 || g_opts.bdev_retry_count == 0) {
3600 0 : spdk_put_io_channel(pg_ch);
3601 0 : free(nvme_qpair);
3602 0 : return rc;
3603 : }
3604 0 : }
3605 61 : }
3606 :
3607 61 : TAILQ_INSERT_TAIL(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
3608 :
3609 61 : ctrlr_ch->qpair = nvme_qpair;
3610 :
3611 61 : nvme_ctrlr_get_ref(nvme_ctrlr);
3612 :
3613 61 : return 0;
3614 61 : }
3615 :
3616 : static int
3617 61 : bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
3618 : {
3619 61 : struct nvme_ctrlr *nvme_ctrlr = io_device;
3620 61 : struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
3621 :
3622 61 : return nvme_qpair_create(nvme_ctrlr, ctrlr_ch);
3623 : }
3624 :
3625 : static void
3626 61 : nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
3627 : {
3628 : struct nvme_io_path *io_path, *next;
3629 :
3630 61 : assert(nvme_qpair->group != NULL);
3631 :
3632 100 : TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
3633 39 : TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
3634 39 : nvme_io_path_free(io_path);
3635 39 : }
3636 :
3637 61 : TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
3638 :
3639 61 : spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
3640 :
3641 61 : nvme_ctrlr_put_ref(nvme_qpair->ctrlr);
3642 :
3643 61 : free(nvme_qpair);
3644 61 : }
3645 :
3646 : static void
3647 61 : bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
3648 : {
3649 61 : struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
3650 : struct nvme_qpair *nvme_qpair;
3651 :
3652 61 : nvme_qpair = ctrlr_ch->qpair;
3653 61 : assert(nvme_qpair != NULL);
3654 :
3655 61 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
3656 :
3657 61 : if (nvme_qpair->qpair != NULL) {
3658 47 : if (ctrlr_ch->reset_iter == NULL) {
3659 47 : spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
3660 47 : } else {
3661 : /* Skip current ctrlr_channel in a full reset sequence because
3662 : * it is being deleted now. The qpair is already being disconnected.
3663 : * We do not have to restart disconnecting it.
3664 : */
3665 0 : nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
3666 : }
3667 :
3668 : /* We cannot release a reference to the poll group now.
3669 : * The qpair may be disconnected asynchronously later.
3670 : * We need to poll it until it is actually disconnected.
3671 : * Just detach the qpair from the deleting ctrlr_channel.
3672 : */
3673 47 : nvme_qpair->ctrlr_ch = NULL;
3674 47 : } else {
3675 14 : assert(ctrlr_ch->reset_iter == NULL);
3676 :
3677 14 : nvme_qpair_delete(nvme_qpair);
3678 : }
3679 61 : }
3680 :
3681 : static inline struct spdk_io_channel *
3682 0 : bdev_nvme_get_accel_channel(struct nvme_poll_group *group)
3683 : {
3684 0 : if (spdk_unlikely(!group->accel_channel)) {
3685 0 : group->accel_channel = spdk_accel_get_io_channel();
3686 0 : if (!group->accel_channel) {
3687 0 : SPDK_ERRLOG("Cannot get the accel_channel for bdev nvme polling group=%p\n",
3688 : group);
3689 0 : return NULL;
3690 : }
3691 0 : }
3692 :
3693 0 : return group->accel_channel;
3694 0 : }
3695 :
3696 : static void
3697 0 : bdev_nvme_finish_sequence(void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
3698 : {
3699 0 : spdk_accel_sequence_finish(seq, cb_fn, cb_arg);
3700 0 : }
3701 :
3702 : static void
3703 0 : bdev_nvme_abort_sequence(void *seq)
3704 : {
3705 0 : spdk_accel_sequence_abort(seq);
3706 0 : }
3707 :
3708 : static void
3709 0 : bdev_nvme_reverse_sequence(void *seq)
3710 : {
3711 0 : spdk_accel_sequence_reverse(seq);
3712 0 : }
3713 :
3714 : static int
3715 0 : bdev_nvme_append_crc32c(void *ctx, void **seq, uint32_t *dst, struct iovec *iovs, uint32_t iovcnt,
3716 : struct spdk_memory_domain *domain, void *domain_ctx, uint32_t seed,
3717 : spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
3718 : {
3719 : struct spdk_io_channel *ch;
3720 0 : struct nvme_poll_group *group = ctx;
3721 :
3722 0 : ch = bdev_nvme_get_accel_channel(group);
3723 0 : if (spdk_unlikely(ch == NULL)) {
3724 0 : return -ENOMEM;
3725 : }
3726 :
3727 0 : return spdk_accel_append_crc32c((struct spdk_accel_sequence **)seq, ch, dst, iovs, iovcnt,
3728 0 : domain, domain_ctx, seed, cb_fn, cb_arg);
3729 0 : }
3730 :
3731 : static int
3732 0 : bdev_nvme_append_copy(void *ctx, void **seq, struct iovec *dst_iovs, uint32_t dst_iovcnt,
3733 : struct spdk_memory_domain *dst_domain, void *dst_domain_ctx,
3734 : struct iovec *src_iovs, uint32_t src_iovcnt,
3735 : struct spdk_memory_domain *src_domain, void *src_domain_ctx,
3736 : spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
3737 : {
3738 : struct spdk_io_channel *ch;
3739 0 : struct nvme_poll_group *group = ctx;
3740 :
3741 0 : ch = bdev_nvme_get_accel_channel(group);
3742 0 : if (spdk_unlikely(ch == NULL)) {
3743 0 : return -ENOMEM;
3744 : }
3745 :
3746 0 : return spdk_accel_append_copy((struct spdk_accel_sequence **)seq, ch,
3747 0 : dst_iovs, dst_iovcnt, dst_domain, dst_domain_ctx,
3748 0 : src_iovs, src_iovcnt, src_domain, src_domain_ctx,
3749 0 : cb_fn, cb_arg);
3750 0 : }
3751 :
3752 : static struct spdk_nvme_accel_fn_table g_bdev_nvme_accel_fn_table = {
3753 : .table_size = sizeof(struct spdk_nvme_accel_fn_table),
3754 : .append_crc32c = bdev_nvme_append_crc32c,
3755 : .append_copy = bdev_nvme_append_copy,
3756 : .finish_sequence = bdev_nvme_finish_sequence,
3757 : .reverse_sequence = bdev_nvme_reverse_sequence,
3758 : .abort_sequence = bdev_nvme_abort_sequence,
3759 : };
3760 :
3761 : static int
3762 0 : bdev_nvme_interrupt_wrapper(void *ctx)
3763 : {
3764 : int num_events;
3765 0 : struct nvme_poll_group *group = ctx;
3766 :
3767 0 : num_events = spdk_nvme_poll_group_wait(group->group, bdev_nvme_disconnected_qpair_cb);
3768 0 : if (spdk_unlikely(num_events < 0)) {
3769 0 : bdev_nvme_check_io_qpairs(group);
3770 0 : }
3771 :
3772 0 : return num_events;
3773 : }
3774 :
3775 : static int
3776 46 : bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf)
3777 : {
3778 46 : struct nvme_poll_group *group = ctx_buf;
3779 : uint64_t period;
3780 : int fd;
3781 :
3782 46 : TAILQ_INIT(&group->qpair_list);
3783 :
3784 46 : group->group = spdk_nvme_poll_group_create(group, &g_bdev_nvme_accel_fn_table);
3785 46 : if (group->group == NULL) {
3786 0 : return -1;
3787 : }
3788 :
3789 46 : period = spdk_interrupt_mode_is_enabled() ? 0 : g_opts.nvme_ioq_poll_period_us;
3790 46 : group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, period);
3791 :
3792 46 : if (group->poller == NULL) {
3793 0 : spdk_nvme_poll_group_destroy(group->group);
3794 0 : return -1;
3795 : }
3796 :
3797 46 : if (spdk_interrupt_mode_is_enabled()) {
3798 0 : spdk_poller_register_interrupt(group->poller, NULL, NULL);
3799 :
3800 0 : fd = spdk_nvme_poll_group_get_fd(group->group);
3801 0 : if (fd < 0) {
3802 0 : spdk_nvme_poll_group_destroy(group->group);
3803 0 : return -1;
3804 : }
3805 :
3806 0 : group->intr = SPDK_INTERRUPT_REGISTER(fd, bdev_nvme_interrupt_wrapper, group);
3807 0 : if (!group->intr) {
3808 0 : spdk_nvme_poll_group_destroy(group->group);
3809 0 : return -1;
3810 : }
3811 0 : }
3812 :
3813 46 : return 0;
3814 46 : }
3815 :
3816 : static void
3817 46 : bdev_nvme_destroy_poll_group_cb(void *io_device, void *ctx_buf)
3818 : {
3819 46 : struct nvme_poll_group *group = ctx_buf;
3820 :
3821 46 : assert(TAILQ_EMPTY(&group->qpair_list));
3822 :
3823 46 : if (group->accel_channel) {
3824 0 : spdk_put_io_channel(group->accel_channel);
3825 0 : }
3826 :
3827 46 : if (spdk_interrupt_mode_is_enabled()) {
3828 0 : spdk_interrupt_unregister(&group->intr);
3829 0 : }
3830 :
3831 46 : spdk_poller_unregister(&group->poller);
3832 46 : if (spdk_nvme_poll_group_destroy(group->group)) {
3833 0 : SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n");
3834 0 : assert(false);
3835 : }
3836 46 : }
3837 :
3838 : static struct spdk_io_channel *
3839 0 : bdev_nvme_get_io_channel(void *ctx)
3840 : {
3841 0 : struct nvme_bdev *nvme_bdev = ctx;
3842 :
3843 0 : return spdk_get_io_channel(nvme_bdev);
3844 : }
3845 :
3846 : static void *
3847 0 : bdev_nvme_get_module_ctx(void *ctx)
3848 : {
3849 0 : struct nvme_bdev *nvme_bdev = ctx;
3850 : struct nvme_ns *nvme_ns;
3851 :
3852 0 : if (!nvme_bdev || nvme_bdev->disk.module != &nvme_if) {
3853 0 : return NULL;
3854 : }
3855 :
3856 0 : nvme_ns = TAILQ_FIRST(&nvme_bdev->nvme_ns_list);
3857 0 : if (!nvme_ns) {
3858 0 : return NULL;
3859 : }
3860 :
3861 0 : return nvme_ns->ns;
3862 0 : }
3863 :
3864 : static const char *
3865 0 : _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state)
3866 : {
3867 0 : switch (ana_state) {
3868 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
3869 0 : return "optimized";
3870 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
3871 0 : return "non_optimized";
3872 : case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3873 0 : return "inaccessible";
3874 : case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3875 0 : return "persistent_loss";
3876 : case SPDK_NVME_ANA_CHANGE_STATE:
3877 0 : return "change";
3878 : default:
3879 0 : return NULL;
3880 : }
3881 0 : }
3882 :
3883 : static int
3884 8 : bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
3885 : {
3886 8 : struct spdk_memory_domain **_domains = NULL;
3887 8 : struct nvme_bdev *nbdev = ctx;
3888 : struct nvme_ns *nvme_ns;
3889 8 : int i = 0, _array_size = array_size;
3890 8 : int rc = 0;
3891 :
3892 22 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
3893 14 : if (domains && array_size >= i) {
3894 11 : _domains = &domains[i];
3895 11 : } else {
3896 3 : _domains = NULL;
3897 : }
3898 14 : rc = spdk_nvme_ctrlr_get_memory_domains(nvme_ns->ctrlr->ctrlr, _domains, _array_size);
3899 14 : if (rc > 0) {
3900 13 : i += rc;
3901 13 : if (_array_size >= rc) {
3902 9 : _array_size -= rc;
3903 9 : } else {
3904 4 : _array_size = 0;
3905 : }
3906 14 : } else if (rc < 0) {
3907 0 : return rc;
3908 : }
3909 14 : }
3910 :
3911 8 : return i;
3912 8 : }
3913 :
3914 : static const char *
3915 0 : nvme_ctrlr_get_state_str(struct nvme_ctrlr *nvme_ctrlr)
3916 : {
3917 0 : if (nvme_ctrlr->destruct) {
3918 0 : return "deleting";
3919 0 : } else if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
3920 0 : return "failed";
3921 0 : } else if (nvme_ctrlr->resetting) {
3922 0 : return "resetting";
3923 0 : } else if (nvme_ctrlr->reconnect_is_delayed > 0) {
3924 0 : return "reconnect_is_delayed";
3925 0 : } else if (nvme_ctrlr->disabled) {
3926 0 : return "disabled";
3927 : } else {
3928 0 : return "enabled";
3929 : }
3930 0 : }
3931 :
3932 : void
3933 0 : nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr)
3934 : {
3935 : struct spdk_nvme_transport_id *trid;
3936 : const struct spdk_nvme_ctrlr_opts *opts;
3937 : const struct spdk_nvme_ctrlr_data *cdata;
3938 : struct nvme_path_id *path_id;
3939 : int32_t numa_id;
3940 :
3941 0 : spdk_json_write_object_begin(w);
3942 :
3943 0 : spdk_json_write_named_string(w, "state", nvme_ctrlr_get_state_str(nvme_ctrlr));
3944 :
3945 : #ifdef SPDK_CONFIG_NVME_CUSE
3946 : size_t cuse_name_size = 128;
3947 : char cuse_name[cuse_name_size];
3948 :
3949 : int rc = spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr, cuse_name, &cuse_name_size);
3950 : if (rc == 0) {
3951 : spdk_json_write_named_string(w, "cuse_device", cuse_name);
3952 : }
3953 : #endif
3954 0 : trid = &nvme_ctrlr->active_path_id->trid;
3955 0 : spdk_json_write_named_object_begin(w, "trid");
3956 0 : nvme_bdev_dump_trid_json(trid, w);
3957 0 : spdk_json_write_object_end(w);
3958 :
3959 0 : path_id = TAILQ_NEXT(nvme_ctrlr->active_path_id, link);
3960 0 : if (path_id != NULL) {
3961 0 : spdk_json_write_named_array_begin(w, "alternate_trids");
3962 0 : do {
3963 0 : trid = &path_id->trid;
3964 0 : spdk_json_write_object_begin(w);
3965 0 : nvme_bdev_dump_trid_json(trid, w);
3966 0 : spdk_json_write_object_end(w);
3967 :
3968 0 : path_id = TAILQ_NEXT(path_id, link);
3969 0 : } while (path_id != NULL);
3970 0 : spdk_json_write_array_end(w);
3971 0 : }
3972 :
3973 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
3974 0 : spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
3975 :
3976 0 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
3977 0 : spdk_json_write_named_object_begin(w, "host");
3978 0 : spdk_json_write_named_string(w, "nqn", opts->hostnqn);
3979 0 : spdk_json_write_named_string(w, "addr", opts->src_addr);
3980 0 : spdk_json_write_named_string(w, "svcid", opts->src_svcid);
3981 0 : spdk_json_write_object_end(w);
3982 :
3983 0 : numa_id = spdk_nvme_ctrlr_get_numa_id(nvme_ctrlr->ctrlr);
3984 0 : if (numa_id != SPDK_ENV_NUMA_ID_ANY) {
3985 0 : spdk_json_write_named_uint32(w, "numa_id", numa_id);
3986 0 : }
3987 0 : spdk_json_write_object_end(w);
3988 0 : }
3989 :
3990 : static void
3991 0 : nvme_namespace_info_json(struct spdk_json_write_ctx *w,
3992 : struct nvme_ns *nvme_ns)
3993 : {
3994 : struct spdk_nvme_ns *ns;
3995 : struct spdk_nvme_ctrlr *ctrlr;
3996 : const struct spdk_nvme_ctrlr_data *cdata;
3997 : const struct spdk_nvme_transport_id *trid;
3998 : union spdk_nvme_vs_register vs;
3999 : const struct spdk_nvme_ns_data *nsdata;
4000 : char buf[128];
4001 :
4002 0 : ns = nvme_ns->ns;
4003 0 : if (ns == NULL) {
4004 0 : return;
4005 : }
4006 :
4007 0 : ctrlr = spdk_nvme_ns_get_ctrlr(ns);
4008 :
4009 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
4010 0 : trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
4011 0 : vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr);
4012 :
4013 0 : spdk_json_write_object_begin(w);
4014 :
4015 0 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
4016 0 : spdk_json_write_named_string(w, "pci_address", trid->traddr);
4017 0 : }
4018 :
4019 0 : spdk_json_write_named_object_begin(w, "trid");
4020 :
4021 0 : nvme_bdev_dump_trid_json(trid, w);
4022 :
4023 0 : spdk_json_write_object_end(w);
4024 :
4025 : #ifdef SPDK_CONFIG_NVME_CUSE
4026 : size_t cuse_name_size = 128;
4027 : char cuse_name[cuse_name_size];
4028 :
4029 : int rc = spdk_nvme_cuse_get_ns_name(ctrlr, spdk_nvme_ns_get_id(ns),
4030 : cuse_name, &cuse_name_size);
4031 : if (rc == 0) {
4032 : spdk_json_write_named_string(w, "cuse_device", cuse_name);
4033 : }
4034 : #endif
4035 :
4036 0 : spdk_json_write_named_object_begin(w, "ctrlr_data");
4037 :
4038 0 : spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
4039 :
4040 0 : spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
4041 :
4042 0 : snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
4043 0 : spdk_str_trim(buf);
4044 0 : spdk_json_write_named_string(w, "model_number", buf);
4045 :
4046 0 : snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
4047 0 : spdk_str_trim(buf);
4048 0 : spdk_json_write_named_string(w, "serial_number", buf);
4049 :
4050 0 : snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
4051 0 : spdk_str_trim(buf);
4052 0 : spdk_json_write_named_string(w, "firmware_revision", buf);
4053 :
4054 0 : if (cdata->subnqn[0] != '\0') {
4055 0 : spdk_json_write_named_string(w, "subnqn", cdata->subnqn);
4056 0 : }
4057 :
4058 0 : spdk_json_write_named_object_begin(w, "oacs");
4059 :
4060 0 : spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
4061 0 : spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
4062 0 : spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
4063 0 : spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
4064 :
4065 0 : spdk_json_write_object_end(w);
4066 :
4067 0 : spdk_json_write_named_bool(w, "multi_ctrlr", cdata->cmic.multi_ctrlr);
4068 0 : spdk_json_write_named_bool(w, "ana_reporting", cdata->cmic.ana_reporting);
4069 :
4070 0 : spdk_json_write_object_end(w);
4071 :
4072 0 : spdk_json_write_named_object_begin(w, "vs");
4073 :
4074 0 : spdk_json_write_name(w, "nvme_version");
4075 0 : if (vs.bits.ter) {
4076 0 : spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
4077 0 : } else {
4078 0 : spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
4079 : }
4080 :
4081 0 : spdk_json_write_object_end(w);
4082 :
4083 0 : nsdata = spdk_nvme_ns_get_data(ns);
4084 :
4085 0 : spdk_json_write_named_object_begin(w, "ns_data");
4086 :
4087 0 : spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
4088 :
4089 0 : if (cdata->cmic.ana_reporting) {
4090 0 : spdk_json_write_named_string(w, "ana_state",
4091 0 : _nvme_ana_state_str(nvme_ns->ana_state));
4092 0 : }
4093 :
4094 0 : spdk_json_write_named_bool(w, "can_share", nsdata->nmic.can_share);
4095 :
4096 0 : spdk_json_write_object_end(w);
4097 :
4098 0 : if (cdata->oacs.security) {
4099 0 : spdk_json_write_named_object_begin(w, "security");
4100 :
4101 0 : spdk_json_write_named_bool(w, "opal", nvme_ns->bdev->opal);
4102 :
4103 0 : spdk_json_write_object_end(w);
4104 0 : }
4105 :
4106 0 : spdk_json_write_object_end(w);
4107 0 : }
4108 :
4109 : static const char *
4110 0 : nvme_bdev_get_mp_policy_str(struct nvme_bdev *nbdev)
4111 : {
4112 0 : switch (nbdev->mp_policy) {
4113 : case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
4114 0 : return "active_passive";
4115 : case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
4116 0 : return "active_active";
4117 : default:
4118 0 : assert(false);
4119 : return "invalid";
4120 : }
4121 0 : }
4122 :
4123 : static const char *
4124 0 : nvme_bdev_get_mp_selector_str(struct nvme_bdev *nbdev)
4125 : {
4126 0 : switch (nbdev->mp_selector) {
4127 : case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
4128 0 : return "round_robin";
4129 : case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
4130 0 : return "queue_depth";
4131 : default:
4132 0 : assert(false);
4133 : return "invalid";
4134 : }
4135 0 : }
4136 :
4137 : static int
4138 0 : bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
4139 : {
4140 0 : struct nvme_bdev *nvme_bdev = ctx;
4141 : struct nvme_ns *nvme_ns;
4142 :
4143 0 : pthread_mutex_lock(&nvme_bdev->mutex);
4144 0 : spdk_json_write_named_array_begin(w, "nvme");
4145 0 : TAILQ_FOREACH(nvme_ns, &nvme_bdev->nvme_ns_list, tailq) {
4146 0 : nvme_namespace_info_json(w, nvme_ns);
4147 0 : }
4148 0 : spdk_json_write_array_end(w);
4149 0 : spdk_json_write_named_string(w, "mp_policy", nvme_bdev_get_mp_policy_str(nvme_bdev));
4150 0 : if (nvme_bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
4151 0 : spdk_json_write_named_string(w, "selector", nvme_bdev_get_mp_selector_str(nvme_bdev));
4152 0 : if (nvme_bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
4153 0 : spdk_json_write_named_uint32(w, "rr_min_io", nvme_bdev->rr_min_io);
4154 0 : }
4155 0 : }
4156 0 : pthread_mutex_unlock(&nvme_bdev->mutex);
4157 :
4158 0 : return 0;
4159 : }
4160 :
4161 : static void
4162 0 : bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
4163 : {
4164 : /* No config per bdev needed */
4165 0 : }
4166 :
4167 : static uint64_t
4168 0 : bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
4169 : {
4170 0 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
4171 : struct nvme_io_path *io_path;
4172 : struct nvme_poll_group *group;
4173 0 : uint64_t spin_time = 0;
4174 :
4175 0 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
4176 0 : group = io_path->qpair->group;
4177 :
4178 0 : if (!group || !group->collect_spin_stat) {
4179 0 : continue;
4180 : }
4181 :
4182 0 : if (group->end_ticks != 0) {
4183 0 : group->spin_ticks += (group->end_ticks - group->start_ticks);
4184 0 : group->end_ticks = 0;
4185 0 : }
4186 :
4187 0 : spin_time += group->spin_ticks;
4188 0 : group->start_ticks = 0;
4189 0 : group->spin_ticks = 0;
4190 0 : }
4191 :
4192 0 : return (spin_time * 1000000ULL) / spdk_get_ticks_hz();
4193 : }
4194 :
4195 : static void
4196 0 : bdev_nvme_reset_device_stat(void *ctx)
4197 : {
4198 0 : struct nvme_bdev *nbdev = ctx;
4199 :
4200 0 : if (nbdev->err_stat != NULL) {
4201 0 : memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat));
4202 0 : }
4203 0 : }
4204 :
4205 : /* JSON string should be lowercases and underscore delimited string. */
4206 : static void
4207 0 : bdev_nvme_format_nvme_status(char *dst, const char *src)
4208 : {
4209 : char tmp[256];
4210 :
4211 0 : spdk_strcpy_replace(dst, 256, src, " - ", "_");
4212 0 : spdk_strcpy_replace(tmp, 256, dst, "-", "_");
4213 0 : spdk_strcpy_replace(dst, 256, tmp, " ", "_");
4214 0 : spdk_strlwr(dst);
4215 0 : }
4216 :
4217 : static void
4218 0 : bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w)
4219 : {
4220 0 : struct nvme_bdev *nbdev = ctx;
4221 0 : struct spdk_nvme_status status = {};
4222 : uint16_t sct, sc;
4223 : char status_json[256];
4224 : const char *status_str;
4225 :
4226 0 : if (nbdev->err_stat == NULL) {
4227 0 : return;
4228 : }
4229 :
4230 0 : spdk_json_write_named_object_begin(w, "nvme_error");
4231 :
4232 0 : spdk_json_write_named_object_begin(w, "status_type");
4233 0 : for (sct = 0; sct < 8; sct++) {
4234 0 : if (nbdev->err_stat->status_type[sct] == 0) {
4235 0 : continue;
4236 : }
4237 0 : status.sct = sct;
4238 :
4239 0 : status_str = spdk_nvme_cpl_get_status_type_string(&status);
4240 0 : assert(status_str != NULL);
4241 0 : bdev_nvme_format_nvme_status(status_json, status_str);
4242 :
4243 0 : spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]);
4244 0 : }
4245 0 : spdk_json_write_object_end(w);
4246 :
4247 0 : spdk_json_write_named_object_begin(w, "status_code");
4248 0 : for (sct = 0; sct < 4; sct++) {
4249 0 : status.sct = sct;
4250 0 : for (sc = 0; sc < 256; sc++) {
4251 0 : if (nbdev->err_stat->status[sct][sc] == 0) {
4252 0 : continue;
4253 : }
4254 0 : status.sc = sc;
4255 :
4256 0 : status_str = spdk_nvme_cpl_get_status_string(&status);
4257 0 : assert(status_str != NULL);
4258 0 : bdev_nvme_format_nvme_status(status_json, status_str);
4259 :
4260 0 : spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]);
4261 0 : }
4262 0 : }
4263 0 : spdk_json_write_object_end(w);
4264 :
4265 0 : spdk_json_write_object_end(w);
4266 0 : }
4267 :
4268 : static bool
4269 0 : bdev_nvme_accel_sequence_supported(void *ctx, enum spdk_bdev_io_type type)
4270 : {
4271 0 : struct nvme_bdev *nbdev = ctx;
4272 : struct nvme_ns *nvme_ns;
4273 : struct spdk_nvme_ctrlr *ctrlr;
4274 :
4275 0 : if (!g_opts.allow_accel_sequence) {
4276 0 : return false;
4277 : }
4278 :
4279 0 : switch (type) {
4280 : case SPDK_BDEV_IO_TYPE_WRITE:
4281 : case SPDK_BDEV_IO_TYPE_READ:
4282 0 : break;
4283 : default:
4284 0 : return false;
4285 : }
4286 :
4287 0 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
4288 0 : assert(nvme_ns != NULL);
4289 :
4290 0 : ctrlr = nvme_ns->ctrlr->ctrlr;
4291 0 : assert(ctrlr != NULL);
4292 :
4293 0 : return spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
4294 0 : }
4295 :
4296 : static const struct spdk_bdev_fn_table nvmelib_fn_table = {
4297 : .destruct = bdev_nvme_destruct,
4298 : .submit_request = bdev_nvme_submit_request,
4299 : .io_type_supported = bdev_nvme_io_type_supported,
4300 : .get_io_channel = bdev_nvme_get_io_channel,
4301 : .dump_info_json = bdev_nvme_dump_info_json,
4302 : .write_config_json = bdev_nvme_write_config_json,
4303 : .get_spin_time = bdev_nvme_get_spin_time,
4304 : .get_module_ctx = bdev_nvme_get_module_ctx,
4305 : .get_memory_domains = bdev_nvme_get_memory_domains,
4306 : .accel_sequence_supported = bdev_nvme_accel_sequence_supported,
4307 : .reset_device_stat = bdev_nvme_reset_device_stat,
4308 : .dump_device_stat_json = bdev_nvme_dump_device_stat_json,
4309 : };
4310 :
4311 : typedef int (*bdev_nvme_parse_ana_log_page_cb)(
4312 : const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg);
4313 :
4314 : static int
4315 42 : bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
4316 : bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
4317 : {
4318 : struct spdk_nvme_ana_group_descriptor *copied_desc;
4319 : uint8_t *orig_desc;
4320 : uint32_t i, desc_size, copy_len;
4321 42 : int rc = 0;
4322 :
4323 42 : if (nvme_ctrlr->ana_log_page == NULL) {
4324 0 : return -EINVAL;
4325 : }
4326 :
4327 42 : copied_desc = nvme_ctrlr->copied_ana_desc;
4328 :
4329 42 : orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
4330 42 : copy_len = nvme_ctrlr->max_ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
4331 :
4332 72 : for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) {
4333 67 : memcpy(copied_desc, orig_desc, copy_len);
4334 :
4335 67 : rc = cb_fn(copied_desc, cb_arg);
4336 67 : if (rc != 0) {
4337 37 : break;
4338 : }
4339 :
4340 30 : desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
4341 30 : copied_desc->num_of_nsid * sizeof(uint32_t);
4342 30 : orig_desc += desc_size;
4343 30 : copy_len -= desc_size;
4344 30 : }
4345 :
4346 42 : return rc;
4347 42 : }
4348 :
4349 : static int
4350 5 : nvme_ns_ana_transition_timedout(void *ctx)
4351 : {
4352 5 : struct nvme_ns *nvme_ns = ctx;
4353 :
4354 5 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4355 5 : nvme_ns->ana_transition_timedout = true;
4356 :
4357 5 : return SPDK_POLLER_BUSY;
4358 : }
4359 :
4360 : static void
4361 46 : _nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
4362 : const struct spdk_nvme_ana_group_descriptor *desc)
4363 : {
4364 : const struct spdk_nvme_ctrlr_data *cdata;
4365 :
4366 46 : nvme_ns->ana_group_id = desc->ana_group_id;
4367 46 : nvme_ns->ana_state = desc->ana_state;
4368 46 : nvme_ns->ana_state_updating = false;
4369 :
4370 46 : switch (nvme_ns->ana_state) {
4371 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
4372 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
4373 39 : nvme_ns->ana_transition_timedout = false;
4374 39 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4375 39 : break;
4376 :
4377 : case SPDK_NVME_ANA_INACCESSIBLE_STATE:
4378 : case SPDK_NVME_ANA_CHANGE_STATE:
4379 6 : if (nvme_ns->anatt_timer != NULL) {
4380 1 : break;
4381 : }
4382 :
4383 5 : cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
4384 5 : nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
4385 : nvme_ns,
4386 : cdata->anatt * SPDK_SEC_TO_USEC);
4387 5 : break;
4388 : default:
4389 1 : break;
4390 : }
4391 46 : }
4392 :
4393 : static int
4394 60 : nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg)
4395 : {
4396 60 : struct nvme_ns *nvme_ns = cb_arg;
4397 : uint32_t i;
4398 :
4399 60 : assert(nvme_ns->ns != NULL);
4400 :
4401 82 : for (i = 0; i < desc->num_of_nsid; i++) {
4402 59 : if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) {
4403 22 : continue;
4404 : }
4405 :
4406 37 : _nvme_ns_set_ana_state(nvme_ns, desc);
4407 37 : return 1;
4408 : }
4409 :
4410 23 : return 0;
4411 60 : }
4412 :
4413 : static int
4414 5 : nvme_generate_uuid(const char *sn, uint32_t nsid, struct spdk_uuid *uuid)
4415 : {
4416 5 : int rc = 0;
4417 : struct spdk_uuid new_uuid, namespace_uuid;
4418 5 : char merged_str[SPDK_NVME_CTRLR_SN_LEN + NSID_STR_LEN + 1] = {'\0'};
4419 : /* This namespace UUID was generated using uuid_generate() method. */
4420 5 : const char *namespace_str = {"edaed2de-24bc-4b07-b559-f47ecbe730fd"};
4421 : int size;
4422 :
4423 5 : assert(strlen(sn) <= SPDK_NVME_CTRLR_SN_LEN);
4424 :
4425 5 : spdk_uuid_set_null(&new_uuid);
4426 5 : spdk_uuid_set_null(&namespace_uuid);
4427 :
4428 5 : size = snprintf(merged_str, sizeof(merged_str), "%s%"PRIu32, sn, nsid);
4429 5 : if (size <= 0 || (unsigned long)size >= sizeof(merged_str)) {
4430 0 : return -EINVAL;
4431 : }
4432 :
4433 5 : spdk_uuid_parse(&namespace_uuid, namespace_str);
4434 :
4435 5 : rc = spdk_uuid_generate_sha1(&new_uuid, &namespace_uuid, merged_str, size);
4436 5 : if (rc == 0) {
4437 5 : memcpy(uuid, &new_uuid, sizeof(struct spdk_uuid));
4438 5 : }
4439 :
4440 5 : return rc;
4441 5 : }
4442 :
4443 : static int
4444 39 : nvme_disk_create(struct spdk_bdev *disk, const char *base_name,
4445 : struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns,
4446 : struct spdk_bdev_nvme_ctrlr_opts *bdev_opts, void *ctx)
4447 : {
4448 : const struct spdk_uuid *uuid;
4449 : const uint8_t *nguid;
4450 : const struct spdk_nvme_ctrlr_data *cdata;
4451 : const struct spdk_nvme_ns_data *nsdata;
4452 : const struct spdk_nvme_ctrlr_opts *opts;
4453 : enum spdk_nvme_csi csi;
4454 : uint32_t atomic_bs, phys_bs, bs;
4455 39 : char sn_tmp[SPDK_NVME_CTRLR_SN_LEN + 1] = {'\0'};
4456 : int rc;
4457 :
4458 39 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
4459 39 : csi = spdk_nvme_ns_get_csi(ns);
4460 39 : opts = spdk_nvme_ctrlr_get_opts(ctrlr);
4461 :
4462 39 : switch (csi) {
4463 : case SPDK_NVME_CSI_NVM:
4464 39 : disk->product_name = "NVMe disk";
4465 39 : break;
4466 : case SPDK_NVME_CSI_ZNS:
4467 0 : disk->product_name = "NVMe ZNS disk";
4468 0 : disk->zoned = true;
4469 0 : disk->zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
4470 0 : disk->max_zone_append_size = spdk_nvme_zns_ctrlr_get_max_zone_append_size(ctrlr) /
4471 0 : spdk_nvme_ns_get_extended_sector_size(ns);
4472 0 : disk->max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(ns);
4473 0 : disk->max_active_zones = spdk_nvme_zns_ns_get_max_active_zones(ns);
4474 0 : break;
4475 : default:
4476 0 : if (bdev_opts->allow_unrecognized_csi) {
4477 0 : disk->product_name = "NVMe Passthrough disk";
4478 0 : break;
4479 : }
4480 0 : SPDK_ERRLOG("unsupported CSI: %u\n", csi);
4481 0 : return -ENOTSUP;
4482 : }
4483 :
4484 39 : nguid = spdk_nvme_ns_get_nguid(ns);
4485 39 : if (!nguid) {
4486 39 : uuid = spdk_nvme_ns_get_uuid(ns);
4487 39 : if (uuid) {
4488 12 : disk->uuid = *uuid;
4489 39 : } else if (g_opts.generate_uuids) {
4490 0 : spdk_strcpy_pad(sn_tmp, cdata->sn, SPDK_NVME_CTRLR_SN_LEN, '\0');
4491 0 : rc = nvme_generate_uuid(sn_tmp, spdk_nvme_ns_get_id(ns), &disk->uuid);
4492 0 : if (rc < 0) {
4493 0 : SPDK_ERRLOG("UUID generation failed (%s)\n", spdk_strerror(-rc));
4494 0 : return rc;
4495 : }
4496 0 : }
4497 39 : } else {
4498 0 : memcpy(&disk->uuid, nguid, sizeof(disk->uuid));
4499 : }
4500 :
4501 39 : disk->name = spdk_sprintf_alloc("%sn%d", base_name, spdk_nvme_ns_get_id(ns));
4502 39 : if (!disk->name) {
4503 0 : return -ENOMEM;
4504 : }
4505 :
4506 39 : disk->write_cache = 0;
4507 39 : if (cdata->vwc.present) {
4508 : /* Enable if the Volatile Write Cache exists */
4509 0 : disk->write_cache = 1;
4510 0 : }
4511 39 : if (cdata->oncs.write_zeroes) {
4512 0 : disk->max_write_zeroes = UINT16_MAX + 1;
4513 0 : }
4514 39 : disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
4515 39 : disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns);
4516 39 : disk->max_segment_size = spdk_nvme_ctrlr_get_max_xfer_size(ctrlr);
4517 39 : disk->ctratt.raw = cdata->ctratt.raw;
4518 39 : disk->nsid = spdk_nvme_ns_get_id(ns);
4519 : /* NVMe driver will split one request into multiple requests
4520 : * based on MDTS and stripe boundary, the bdev layer will use
4521 : * max_segment_size and max_num_segments to split one big IO
4522 : * into multiple requests, then small request can't run out
4523 : * of NVMe internal requests data structure.
4524 : */
4525 39 : if (opts && opts->io_queue_requests) {
4526 0 : disk->max_num_segments = opts->io_queue_requests / 2;
4527 0 : }
4528 39 : if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_SGL_SUPPORTED) {
4529 : /* The nvme driver will try to split I/O that have too many
4530 : * SGEs, but it doesn't work if that last SGE doesn't end on
4531 : * an aggregate total that is block aligned. The bdev layer has
4532 : * a more robust splitting framework, so use that instead for
4533 : * this case. (See issue #3269.)
4534 : */
4535 0 : uint16_t max_sges = spdk_nvme_ctrlr_get_max_sges(ctrlr);
4536 :
4537 0 : if (disk->max_num_segments == 0) {
4538 0 : disk->max_num_segments = max_sges;
4539 0 : } else {
4540 0 : disk->max_num_segments = spdk_min(disk->max_num_segments, max_sges);
4541 : }
4542 0 : }
4543 39 : disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
4544 :
4545 39 : nsdata = spdk_nvme_ns_get_data(ns);
4546 39 : bs = spdk_nvme_ns_get_sector_size(ns);
4547 39 : atomic_bs = bs;
4548 39 : phys_bs = bs;
4549 39 : if (nsdata->nabo == 0) {
4550 39 : if (nsdata->nsfeat.ns_atomic_write_unit && nsdata->nawupf) {
4551 0 : atomic_bs = bs * (1 + nsdata->nawupf);
4552 0 : } else {
4553 39 : atomic_bs = bs * (1 + cdata->awupf);
4554 : }
4555 39 : }
4556 39 : if (nsdata->nsfeat.optperf) {
4557 0 : phys_bs = bs * (1 + nsdata->npwg);
4558 0 : }
4559 39 : disk->phys_blocklen = spdk_min(phys_bs, atomic_bs);
4560 :
4561 39 : disk->md_len = spdk_nvme_ns_get_md_size(ns);
4562 39 : if (disk->md_len != 0) {
4563 0 : disk->md_interleave = nsdata->flbas.extended;
4564 0 : disk->dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
4565 0 : if (disk->dif_type != SPDK_DIF_DISABLE) {
4566 0 : disk->dif_is_head_of_md = nsdata->dps.md_start;
4567 0 : disk->dif_check_flags = bdev_opts->prchk_flags;
4568 0 : disk->dif_pi_format = (enum spdk_dif_pi_format)spdk_nvme_ns_get_pi_format(ns);
4569 0 : }
4570 0 : }
4571 :
4572 39 : if (!(spdk_nvme_ctrlr_get_flags(ctrlr) &
4573 : SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED)) {
4574 39 : disk->acwu = 0;
4575 39 : } else if (nsdata->nsfeat.ns_atomic_write_unit) {
4576 0 : disk->acwu = nsdata->nacwu + 1; /* 0-based */
4577 0 : } else {
4578 0 : disk->acwu = cdata->acwu + 1; /* 0-based */
4579 : }
4580 :
4581 39 : if (cdata->oncs.copy) {
4582 : /* For now bdev interface allows only single segment copy */
4583 0 : disk->max_copy = nsdata->mssrl;
4584 0 : }
4585 :
4586 39 : disk->ctxt = ctx;
4587 39 : disk->fn_table = &nvmelib_fn_table;
4588 39 : disk->module = &nvme_if;
4589 :
4590 39 : disk->numa.id_valid = 1;
4591 39 : disk->numa.id = spdk_nvme_ctrlr_get_numa_id(ctrlr);
4592 :
4593 39 : return 0;
4594 39 : }
4595 :
4596 : static struct nvme_bdev *
4597 39 : nvme_bdev_alloc(void)
4598 : {
4599 : struct nvme_bdev *bdev;
4600 : int rc;
4601 :
4602 39 : bdev = calloc(1, sizeof(*bdev));
4603 39 : if (!bdev) {
4604 0 : SPDK_ERRLOG("bdev calloc() failed\n");
4605 0 : return NULL;
4606 : }
4607 :
4608 39 : if (g_opts.nvme_error_stat) {
4609 0 : bdev->err_stat = calloc(1, sizeof(struct nvme_error_stat));
4610 0 : if (!bdev->err_stat) {
4611 0 : SPDK_ERRLOG("err_stat calloc() failed\n");
4612 0 : free(bdev);
4613 0 : return NULL;
4614 : }
4615 0 : }
4616 :
4617 39 : rc = pthread_mutex_init(&bdev->mutex, NULL);
4618 39 : if (rc != 0) {
4619 0 : free(bdev->err_stat);
4620 0 : free(bdev);
4621 0 : return NULL;
4622 : }
4623 :
4624 39 : bdev->ref = 1;
4625 39 : bdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE;
4626 39 : bdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN;
4627 39 : bdev->rr_min_io = UINT32_MAX;
4628 39 : TAILQ_INIT(&bdev->nvme_ns_list);
4629 :
4630 39 : return bdev;
4631 39 : }
4632 :
4633 : static int
4634 39 : nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4635 : {
4636 : struct nvme_bdev *bdev;
4637 39 : struct nvme_bdev_ctrlr *nbdev_ctrlr = nvme_ctrlr->nbdev_ctrlr;
4638 : int rc;
4639 :
4640 39 : bdev = nvme_bdev_alloc();
4641 39 : if (bdev == NULL) {
4642 0 : SPDK_ERRLOG("Failed to allocate NVMe bdev\n");
4643 0 : return -ENOMEM;
4644 : }
4645 :
4646 39 : bdev->opal = nvme_ctrlr->opal_dev != NULL;
4647 :
4648 78 : rc = nvme_disk_create(&bdev->disk, nbdev_ctrlr->name, nvme_ctrlr->ctrlr,
4649 39 : nvme_ns->ns, &nvme_ctrlr->opts, bdev);
4650 39 : if (rc != 0) {
4651 0 : SPDK_ERRLOG("Failed to create NVMe disk\n");
4652 0 : nvme_bdev_free(bdev);
4653 0 : return rc;
4654 : }
4655 :
4656 78 : spdk_io_device_register(bdev,
4657 : bdev_nvme_create_bdev_channel_cb,
4658 : bdev_nvme_destroy_bdev_channel_cb,
4659 : sizeof(struct nvme_bdev_channel),
4660 39 : bdev->disk.name);
4661 :
4662 39 : nvme_ns->bdev = bdev;
4663 39 : bdev->nsid = nvme_ns->id;
4664 39 : TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
4665 :
4666 39 : bdev->nbdev_ctrlr = nbdev_ctrlr;
4667 39 : TAILQ_INSERT_TAIL(&nbdev_ctrlr->bdevs, bdev, tailq);
4668 :
4669 39 : rc = spdk_bdev_register(&bdev->disk);
4670 39 : if (rc != 0) {
4671 1 : SPDK_ERRLOG("spdk_bdev_register() failed\n");
4672 1 : spdk_io_device_unregister(bdev, NULL);
4673 1 : nvme_ns->bdev = NULL;
4674 1 : TAILQ_REMOVE(&nbdev_ctrlr->bdevs, bdev, tailq);
4675 1 : nvme_bdev_free(bdev);
4676 1 : return rc;
4677 : }
4678 :
4679 38 : return 0;
4680 39 : }
4681 :
4682 : static bool
4683 23 : bdev_nvme_compare_ns(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
4684 : {
4685 : const struct spdk_nvme_ns_data *nsdata1, *nsdata2;
4686 : const struct spdk_uuid *uuid1, *uuid2;
4687 :
4688 23 : nsdata1 = spdk_nvme_ns_get_data(ns1);
4689 23 : nsdata2 = spdk_nvme_ns_get_data(ns2);
4690 23 : uuid1 = spdk_nvme_ns_get_uuid(ns1);
4691 23 : uuid2 = spdk_nvme_ns_get_uuid(ns2);
4692 :
4693 71 : return memcmp(nsdata1->nguid, nsdata2->nguid, sizeof(nsdata1->nguid)) == 0 &&
4694 22 : nsdata1->eui64 == nsdata2->eui64 &&
4695 21 : ((uuid1 == NULL && uuid2 == NULL) ||
4696 29 : (uuid1 != NULL && uuid2 != NULL && spdk_uuid_compare(uuid1, uuid2) == 0)) &&
4697 18 : spdk_nvme_ns_get_csi(ns1) == spdk_nvme_ns_get_csi(ns2);
4698 : }
4699 :
4700 : static bool
4701 0 : hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
4702 : struct spdk_nvme_ctrlr_opts *opts)
4703 : {
4704 : struct nvme_probe_skip_entry *entry;
4705 :
4706 0 : TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
4707 0 : if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
4708 0 : return false;
4709 : }
4710 0 : }
4711 :
4712 0 : opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
4713 0 : opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
4714 0 : opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
4715 0 : opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
4716 0 : opts->disable_read_ana_log_page = true;
4717 :
4718 0 : SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr);
4719 :
4720 0 : return true;
4721 0 : }
4722 :
4723 : static void
4724 0 : nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
4725 : {
4726 0 : struct nvme_ctrlr *nvme_ctrlr = ctx;
4727 :
4728 0 : if (spdk_nvme_cpl_is_error(cpl)) {
4729 0 : NVME_CTRLR_WARNLOG(nvme_ctrlr, "Abort failed. Resetting controller. sc is %u, sct is %u.\n",
4730 : cpl->status.sc, cpl->status.sct);
4731 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4732 0 : } else if (cpl->cdw0 & 0x1) {
4733 0 : NVME_CTRLR_WARNLOG(nvme_ctrlr, "Specified command could not be aborted.\n");
4734 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4735 0 : }
4736 0 : }
4737 :
4738 : static void
4739 0 : timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
4740 : struct spdk_nvme_qpair *qpair, uint16_t cid)
4741 : {
4742 0 : struct nvme_ctrlr *nvme_ctrlr = cb_arg;
4743 : union spdk_nvme_csts_register csts;
4744 : int rc;
4745 :
4746 0 : assert(nvme_ctrlr->ctrlr == ctrlr);
4747 :
4748 0 : NVME_CTRLR_WARNLOG(nvme_ctrlr, "Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n",
4749 : ctrlr, qpair, cid);
4750 :
4751 : /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O
4752 : * queue. (Note: qpair == NULL when there's an admin cmd timeout.) Otherwise we
4753 : * would submit another fabrics cmd on the admin queue to read CSTS and check for its
4754 : * completion recursively.
4755 : */
4756 0 : if (nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE || qpair != NULL) {
4757 0 : csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
4758 0 : if (csts.bits.cfs) {
4759 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Controller Fatal Status, reset required\n");
4760 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4761 0 : return;
4762 : }
4763 0 : }
4764 :
4765 0 : switch (g_opts.action_on_timeout) {
4766 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
4767 0 : if (qpair) {
4768 : /* Don't send abort to ctrlr when ctrlr is not available. */
4769 0 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4770 0 : if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
4771 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4772 0 : NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Quit abort. Ctrlr is not available.\n");
4773 0 : return;
4774 : }
4775 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4776 :
4777 0 : rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
4778 0 : nvme_abort_cpl, nvme_ctrlr);
4779 0 : if (rc == 0) {
4780 0 : return;
4781 : }
4782 :
4783 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Unable to send abort. Resetting, rc is %d.\n", rc);
4784 0 : }
4785 :
4786 : /* FALLTHROUGH */
4787 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
4788 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4789 0 : break;
4790 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
4791 0 : NVME_CTRLR_DEBUGLOG(nvme_ctrlr, "No action for nvme controller timeout.\n");
4792 0 : break;
4793 : default:
4794 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "An invalid timeout action value is found.\n");
4795 0 : break;
4796 : }
4797 0 : }
4798 :
4799 : static struct nvme_ns *
4800 52 : nvme_ns_alloc(void)
4801 : {
4802 : struct nvme_ns *nvme_ns;
4803 :
4804 52 : nvme_ns = calloc(1, sizeof(struct nvme_ns));
4805 52 : if (nvme_ns == NULL) {
4806 0 : return NULL;
4807 : }
4808 :
4809 52 : if (g_opts.io_path_stat) {
4810 0 : nvme_ns->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
4811 0 : if (nvme_ns->stat == NULL) {
4812 0 : free(nvme_ns);
4813 0 : return NULL;
4814 : }
4815 0 : spdk_bdev_reset_io_stat(nvme_ns->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
4816 0 : }
4817 :
4818 52 : return nvme_ns;
4819 52 : }
4820 :
4821 : static void
4822 52 : nvme_ns_free(struct nvme_ns *nvme_ns)
4823 : {
4824 52 : free(nvme_ns->stat);
4825 52 : free(nvme_ns);
4826 52 : }
4827 :
4828 : static void
4829 52 : nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc)
4830 : {
4831 52 : struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
4832 52 : struct nvme_async_probe_ctx *ctx = nvme_ns->probe_ctx;
4833 :
4834 52 : if (rc == 0) {
4835 50 : nvme_ns->probe_ctx = NULL;
4836 50 : nvme_ctrlr_get_ref(nvme_ctrlr);
4837 50 : } else {
4838 2 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4839 2 : nvme_ns_free(nvme_ns);
4840 : }
4841 :
4842 52 : if (ctx) {
4843 51 : ctx->populates_in_progress--;
4844 51 : if (ctx->populates_in_progress == 0) {
4845 12 : nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
4846 12 : }
4847 51 : }
4848 52 : }
4849 :
4850 : static void
4851 2 : bdev_nvme_add_io_path(struct nvme_bdev_channel_iter *i,
4852 : struct nvme_bdev *nbdev,
4853 : struct nvme_bdev_channel *nbdev_ch, void *ctx)
4854 : {
4855 2 : struct nvme_ns *nvme_ns = ctx;
4856 : int rc;
4857 :
4858 2 : rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
4859 2 : if (rc != 0) {
4860 0 : SPDK_ERRLOG("Failed to add I/O path to bdev_channel dynamically.\n");
4861 0 : }
4862 :
4863 2 : nvme_bdev_for_each_channel_continue(i, rc);
4864 2 : }
4865 :
4866 : static void
4867 2 : bdev_nvme_delete_io_path(struct nvme_bdev_channel_iter *i,
4868 : struct nvme_bdev *nbdev,
4869 : struct nvme_bdev_channel *nbdev_ch, void *ctx)
4870 : {
4871 2 : struct nvme_ns *nvme_ns = ctx;
4872 : struct nvme_io_path *io_path;
4873 :
4874 2 : io_path = _bdev_nvme_get_io_path(nbdev_ch, nvme_ns);
4875 2 : if (io_path != NULL) {
4876 2 : _bdev_nvme_delete_io_path(nbdev_ch, io_path);
4877 2 : }
4878 :
4879 2 : nvme_bdev_for_each_channel_continue(i, 0);
4880 2 : }
4881 :
4882 : static void
4883 0 : bdev_nvme_add_io_path_failed(struct nvme_bdev *nbdev, void *ctx, int status)
4884 : {
4885 0 : struct nvme_ns *nvme_ns = ctx;
4886 :
4887 0 : nvme_ctrlr_populate_namespace_done(nvme_ns, -1);
4888 0 : }
4889 :
4890 : static void
4891 12 : bdev_nvme_add_io_path_done(struct nvme_bdev *nbdev, void *ctx, int status)
4892 : {
4893 12 : struct nvme_ns *nvme_ns = ctx;
4894 :
4895 12 : if (status == 0) {
4896 12 : nvme_ctrlr_populate_namespace_done(nvme_ns, 0);
4897 12 : } else {
4898 : /* Delete the added io_paths and fail populating the namespace. */
4899 0 : nvme_bdev_for_each_channel(nbdev,
4900 : bdev_nvme_delete_io_path,
4901 0 : nvme_ns,
4902 : bdev_nvme_add_io_path_failed);
4903 : }
4904 12 : }
4905 :
4906 : static int
4907 13 : nvme_bdev_add_ns(struct nvme_bdev *bdev, struct nvme_ns *nvme_ns)
4908 : {
4909 : struct nvme_ns *tmp_ns;
4910 : const struct spdk_nvme_ns_data *nsdata;
4911 :
4912 13 : nsdata = spdk_nvme_ns_get_data(nvme_ns->ns);
4913 13 : if (!nsdata->nmic.can_share) {
4914 0 : SPDK_ERRLOG("Namespace cannot be shared.\n");
4915 0 : return -EINVAL;
4916 : }
4917 :
4918 13 : pthread_mutex_lock(&bdev->mutex);
4919 :
4920 13 : tmp_ns = TAILQ_FIRST(&bdev->nvme_ns_list);
4921 13 : assert(tmp_ns != NULL);
4922 :
4923 13 : if (tmp_ns->ns != NULL && !bdev_nvme_compare_ns(nvme_ns->ns, tmp_ns->ns)) {
4924 1 : pthread_mutex_unlock(&bdev->mutex);
4925 1 : SPDK_ERRLOG("Namespaces are not identical.\n");
4926 1 : return -EINVAL;
4927 : }
4928 :
4929 12 : bdev->ref++;
4930 12 : TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
4931 12 : nvme_ns->bdev = bdev;
4932 :
4933 12 : pthread_mutex_unlock(&bdev->mutex);
4934 :
4935 : /* Add nvme_io_path to nvme_bdev_channels dynamically. */
4936 24 : nvme_bdev_for_each_channel(bdev,
4937 : bdev_nvme_add_io_path,
4938 12 : nvme_ns,
4939 : bdev_nvme_add_io_path_done);
4940 :
4941 12 : return 0;
4942 13 : }
4943 :
4944 : static void
4945 52 : nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4946 : {
4947 : struct spdk_nvme_ns *ns;
4948 : struct nvme_bdev *bdev;
4949 52 : int rc = 0;
4950 :
4951 52 : ns = spdk_nvme_ctrlr_get_ns(nvme_ctrlr->ctrlr, nvme_ns->id);
4952 52 : if (!ns) {
4953 0 : NVME_CTRLR_DEBUGLOG(nvme_ctrlr, "Invalid NS %d\n", nvme_ns->id);
4954 0 : rc = -EINVAL;
4955 0 : goto done;
4956 : }
4957 :
4958 52 : nvme_ns->ns = ns;
4959 52 : nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
4960 :
4961 52 : if (nvme_ctrlr->ana_log_page != NULL) {
4962 38 : bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns);
4963 38 : }
4964 :
4965 52 : bdev = nvme_bdev_ctrlr_get_bdev(nvme_ctrlr->nbdev_ctrlr, nvme_ns->id);
4966 92 : if (bdev == NULL) {
4967 39 : rc = nvme_bdev_create(nvme_ctrlr, nvme_ns);
4968 39 : } else {
4969 13 : rc = nvme_bdev_add_ns(bdev, nvme_ns);
4970 13 : if (rc == 0) {
4971 12 : return;
4972 : }
4973 : }
4974 : done:
4975 40 : nvme_ctrlr_populate_namespace_done(nvme_ns, rc);
4976 52 : }
4977 :
4978 : static void
4979 50 : nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns)
4980 : {
4981 50 : struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
4982 :
4983 50 : assert(nvme_ctrlr != NULL);
4984 :
4985 50 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4986 :
4987 50 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4988 :
4989 50 : if (nvme_ns->bdev != NULL) {
4990 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4991 0 : return;
4992 : }
4993 :
4994 50 : nvme_ns_free(nvme_ns);
4995 50 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4996 :
4997 50 : nvme_ctrlr_put_ref(nvme_ctrlr);
4998 50 : }
4999 :
5000 : static void
5001 11 : bdev_nvme_delete_io_path_done(struct nvme_bdev *nbdev, void *ctx, int status)
5002 : {
5003 11 : struct nvme_ns *nvme_ns = ctx;
5004 :
5005 11 : nvme_ctrlr_depopulate_namespace_done(nvme_ns);
5006 11 : }
5007 :
5008 : static void
5009 50 : nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
5010 : {
5011 : struct nvme_bdev *bdev;
5012 :
5013 50 : spdk_poller_unregister(&nvme_ns->anatt_timer);
5014 :
5015 50 : bdev = nvme_ns->bdev;
5016 50 : if (bdev != NULL) {
5017 46 : pthread_mutex_lock(&bdev->mutex);
5018 :
5019 46 : assert(bdev->ref > 0);
5020 46 : bdev->ref--;
5021 46 : if (bdev->ref == 0) {
5022 35 : pthread_mutex_unlock(&bdev->mutex);
5023 :
5024 35 : spdk_bdev_unregister(&bdev->disk, NULL, NULL);
5025 35 : } else {
5026 : /* spdk_bdev_unregister() is not called until the last nvme_ns is
5027 : * depopulated. Hence we need to remove nvme_ns from bdev->nvme_ns_list
5028 : * and clear nvme_ns->bdev here.
5029 : */
5030 11 : TAILQ_REMOVE(&bdev->nvme_ns_list, nvme_ns, tailq);
5031 11 : nvme_ns->bdev = NULL;
5032 :
5033 11 : pthread_mutex_unlock(&bdev->mutex);
5034 :
5035 : /* Delete nvme_io_paths from nvme_bdev_channels dynamically. After that,
5036 : * we call depopulate_namespace_done() to avoid use-after-free.
5037 : */
5038 22 : nvme_bdev_for_each_channel(bdev,
5039 : bdev_nvme_delete_io_path,
5040 11 : nvme_ns,
5041 : bdev_nvme_delete_io_path_done);
5042 11 : return;
5043 : }
5044 35 : }
5045 :
5046 39 : nvme_ctrlr_depopulate_namespace_done(nvme_ns);
5047 50 : }
5048 :
5049 : static void
5050 68 : nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
5051 : struct nvme_async_probe_ctx *ctx)
5052 : {
5053 68 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5054 : struct nvme_ns *nvme_ns, *next;
5055 : struct spdk_nvme_ns *ns;
5056 : struct nvme_bdev *bdev;
5057 : uint32_t nsid;
5058 : int rc;
5059 : uint64_t num_sectors;
5060 :
5061 68 : if (ctx) {
5062 : /* Initialize this count to 1 to handle the populate functions
5063 : * calling nvme_ctrlr_populate_namespace_done() immediately.
5064 : */
5065 52 : ctx->populates_in_progress = 1;
5066 52 : }
5067 :
5068 : /* First loop over our existing namespaces and see if they have been
5069 : * removed. */
5070 68 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
5071 72 : while (nvme_ns != NULL) {
5072 4 : next = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
5073 :
5074 4 : if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
5075 : /* NS is still there or added again. Its attributes may have changed. */
5076 3 : ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id);
5077 3 : if (nvme_ns->ns != ns) {
5078 1 : assert(nvme_ns->ns == NULL);
5079 1 : nvme_ns->ns = ns;
5080 1 : NVME_CTRLR_DEBUGLOG(nvme_ctrlr, "NSID %u was added\n", nvme_ns->id);
5081 1 : }
5082 :
5083 3 : num_sectors = spdk_nvme_ns_get_num_sectors(ns);
5084 3 : bdev = nvme_ns->bdev;
5085 3 : assert(bdev != NULL);
5086 3 : if (bdev->disk.blockcnt != num_sectors) {
5087 1 : NVME_CTRLR_NOTICELOG(nvme_ctrlr,
5088 : "NSID %u is resized: bdev name %s, old size %" PRIu64 ", new size %" PRIu64 "\n",
5089 : nvme_ns->id,
5090 : bdev->disk.name,
5091 : bdev->disk.blockcnt,
5092 : num_sectors);
5093 1 : rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors);
5094 1 : if (rc != 0) {
5095 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr,
5096 : "Could not change num blocks for nvme bdev: name %s, errno: %d.\n",
5097 : bdev->disk.name, rc);
5098 0 : }
5099 1 : }
5100 3 : } else {
5101 : /* Namespace was removed */
5102 1 : nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
5103 : }
5104 :
5105 4 : nvme_ns = next;
5106 : }
5107 :
5108 : /* Loop through all of the namespaces at the nvme level and see if any of them are new */
5109 68 : nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
5110 123 : while (nsid != 0) {
5111 55 : nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
5112 :
5113 55 : if (nvme_ns == NULL) {
5114 : /* Found a new one */
5115 52 : nvme_ns = nvme_ns_alloc();
5116 52 : if (nvme_ns == NULL) {
5117 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate namespace\n");
5118 : /* This just fails to attach the namespace. It may work on a future attempt. */
5119 0 : continue;
5120 : }
5121 :
5122 52 : nvme_ns->id = nsid;
5123 52 : nvme_ns->ctrlr = nvme_ctrlr;
5124 :
5125 52 : nvme_ns->bdev = NULL;
5126 :
5127 52 : if (ctx) {
5128 51 : ctx->populates_in_progress++;
5129 51 : }
5130 52 : nvme_ns->probe_ctx = ctx;
5131 :
5132 52 : RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
5133 :
5134 52 : nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns);
5135 52 : }
5136 :
5137 55 : nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid);
5138 : }
5139 :
5140 68 : if (ctx) {
5141 : /* Decrement this count now that the loop is over to account
5142 : * for the one we started with. If the count is then 0, we
5143 : * know any populate_namespace functions completed immediately,
5144 : * so we'll kick the callback here.
5145 : */
5146 52 : ctx->populates_in_progress--;
5147 52 : if (ctx->populates_in_progress == 0) {
5148 40 : nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
5149 40 : }
5150 52 : }
5151 :
5152 68 : }
5153 :
5154 : static void
5155 67 : nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
5156 : {
5157 : struct nvme_ns *nvme_ns, *tmp;
5158 :
5159 116 : RB_FOREACH_SAFE(nvme_ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp) {
5160 49 : nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
5161 49 : }
5162 67 : }
5163 :
5164 : static uint32_t
5165 42 : nvme_ctrlr_get_ana_log_page_size(struct nvme_ctrlr *nvme_ctrlr)
5166 : {
5167 42 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5168 : const struct spdk_nvme_ctrlr_data *cdata;
5169 42 : uint32_t nsid, ns_count = 0;
5170 :
5171 42 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5172 :
5173 87 : for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
5174 87 : nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
5175 45 : ns_count++;
5176 45 : }
5177 :
5178 84 : return sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
5179 42 : sizeof(struct spdk_nvme_ana_group_descriptor) + ns_count *
5180 : sizeof(uint32_t);
5181 : }
5182 :
5183 : static int
5184 7 : nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
5185 : void *cb_arg)
5186 : {
5187 7 : struct nvme_ctrlr *nvme_ctrlr = cb_arg;
5188 : struct nvme_ns *nvme_ns;
5189 : uint32_t i, nsid;
5190 :
5191 13 : for (i = 0; i < desc->num_of_nsid; i++) {
5192 6 : nsid = desc->nsid[i];
5193 6 : if (nsid == 0) {
5194 0 : continue;
5195 : }
5196 :
5197 6 : nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
5198 :
5199 6 : if (nvme_ns == NULL) {
5200 : /* Target told us that an inactive namespace had an ANA change */
5201 1 : continue;
5202 : }
5203 :
5204 5 : _nvme_ns_set_ana_state(nvme_ns, desc);
5205 5 : }
5206 :
5207 7 : return 0;
5208 : }
5209 :
5210 : static void
5211 0 : bdev_nvme_disable_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
5212 : {
5213 : struct nvme_ns *nvme_ns;
5214 :
5215 0 : spdk_free(nvme_ctrlr->ana_log_page);
5216 0 : nvme_ctrlr->ana_log_page = NULL;
5217 :
5218 0 : for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
5219 0 : nvme_ns != NULL;
5220 0 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
5221 0 : nvme_ns->ana_state_updating = false;
5222 0 : nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
5223 0 : }
5224 0 : }
5225 :
5226 : static void
5227 3 : nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
5228 : {
5229 3 : struct nvme_ctrlr *nvme_ctrlr = ctx;
5230 :
5231 3 : if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
5232 6 : bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
5233 3 : nvme_ctrlr);
5234 3 : } else {
5235 0 : bdev_nvme_disable_read_ana_log_page(nvme_ctrlr);
5236 : }
5237 :
5238 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
5239 :
5240 3 : assert(nvme_ctrlr->ana_log_page_updating == true);
5241 3 : nvme_ctrlr->ana_log_page_updating = false;
5242 :
5243 3 : if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
5244 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5245 :
5246 0 : nvme_ctrlr_unregister(nvme_ctrlr);
5247 0 : } else {
5248 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5249 :
5250 3 : bdev_nvme_clear_io_path_caches(nvme_ctrlr);
5251 : }
5252 3 : }
5253 :
5254 : static int
5255 6 : nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
5256 : {
5257 : uint32_t ana_log_page_size;
5258 : int rc;
5259 :
5260 6 : if (nvme_ctrlr->ana_log_page == NULL) {
5261 0 : return -EINVAL;
5262 : }
5263 :
5264 6 : ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
5265 :
5266 6 : if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
5267 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr,
5268 : "ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
5269 : ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
5270 0 : return -EINVAL;
5271 : }
5272 :
5273 6 : pthread_mutex_lock(&nvme_ctrlr->mutex);
5274 6 : if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
5275 5 : nvme_ctrlr->ana_log_page_updating) {
5276 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5277 3 : return -EBUSY;
5278 : }
5279 :
5280 3 : nvme_ctrlr->ana_log_page_updating = true;
5281 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5282 :
5283 6 : rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
5284 : SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
5285 : SPDK_NVME_GLOBAL_NS_TAG,
5286 3 : nvme_ctrlr->ana_log_page,
5287 3 : ana_log_page_size, 0,
5288 : nvme_ctrlr_read_ana_log_page_done,
5289 3 : nvme_ctrlr);
5290 3 : if (rc != 0) {
5291 0 : nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
5292 0 : }
5293 :
5294 3 : return rc;
5295 6 : }
5296 :
5297 : static void
5298 0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
5299 : {
5300 0 : }
5301 :
5302 : struct bdev_nvme_set_preferred_path_ctx {
5303 : struct spdk_bdev_desc *desc;
5304 : struct nvme_ns *nvme_ns;
5305 : bdev_nvme_set_preferred_path_cb cb_fn;
5306 : void *cb_arg;
5307 : };
5308 :
5309 : static void
5310 3 : bdev_nvme_set_preferred_path_done(struct nvme_bdev *nbdev, void *_ctx, int status)
5311 : {
5312 3 : struct bdev_nvme_set_preferred_path_ctx *ctx = _ctx;
5313 :
5314 3 : assert(ctx != NULL);
5315 3 : assert(ctx->desc != NULL);
5316 3 : assert(ctx->cb_fn != NULL);
5317 :
5318 3 : spdk_bdev_close(ctx->desc);
5319 :
5320 3 : ctx->cb_fn(ctx->cb_arg, status);
5321 :
5322 3 : free(ctx);
5323 3 : }
5324 :
5325 : static void
5326 2 : _bdev_nvme_set_preferred_path(struct nvme_bdev_channel_iter *i,
5327 : struct nvme_bdev *nbdev,
5328 : struct nvme_bdev_channel *nbdev_ch, void *_ctx)
5329 : {
5330 2 : struct bdev_nvme_set_preferred_path_ctx *ctx = _ctx;
5331 : struct nvme_io_path *io_path, *prev;
5332 :
5333 2 : prev = NULL;
5334 3 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
5335 3 : if (io_path->nvme_ns == ctx->nvme_ns) {
5336 2 : break;
5337 : }
5338 1 : prev = io_path;
5339 1 : }
5340 :
5341 2 : if (io_path != NULL) {
5342 2 : if (prev != NULL) {
5343 1 : STAILQ_REMOVE_AFTER(&nbdev_ch->io_path_list, prev, stailq);
5344 1 : STAILQ_INSERT_HEAD(&nbdev_ch->io_path_list, io_path, stailq);
5345 1 : }
5346 :
5347 : /* We can set io_path to nbdev_ch->current_io_path directly here.
5348 : * However, it needs to be conditional. To simplify the code,
5349 : * just clear nbdev_ch->current_io_path and let find_io_path()
5350 : * fill it.
5351 : *
5352 : * Automatic failback may be disabled. Hence even if the io_path is
5353 : * already at the head, clear nbdev_ch->current_io_path.
5354 : */
5355 2 : bdev_nvme_clear_current_io_path(nbdev_ch);
5356 2 : }
5357 :
5358 2 : nvme_bdev_for_each_channel_continue(i, 0);
5359 2 : }
5360 :
5361 : static struct nvme_ns *
5362 3 : bdev_nvme_set_preferred_ns(struct nvme_bdev *nbdev, uint16_t cntlid)
5363 : {
5364 : struct nvme_ns *nvme_ns, *prev;
5365 : const struct spdk_nvme_ctrlr_data *cdata;
5366 :
5367 3 : prev = NULL;
5368 6 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
5369 6 : cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
5370 :
5371 6 : if (cdata->cntlid == cntlid) {
5372 3 : break;
5373 : }
5374 3 : prev = nvme_ns;
5375 3 : }
5376 :
5377 3 : if (nvme_ns != NULL && prev != NULL) {
5378 2 : TAILQ_REMOVE(&nbdev->nvme_ns_list, nvme_ns, tailq);
5379 2 : TAILQ_INSERT_HEAD(&nbdev->nvme_ns_list, nvme_ns, tailq);
5380 2 : }
5381 :
5382 3 : return nvme_ns;
5383 : }
5384 :
5385 : /* This function supports only multipath mode. There is only a single I/O path
5386 : * for each NVMe-oF controller. Hence, just move the matched I/O path to the
5387 : * head of the I/O path list for each NVMe bdev channel.
5388 : *
5389 : * NVMe bdev channel may be acquired after completing this function. move the
5390 : * matched namespace to the head of the namespace list for the NVMe bdev too.
5391 : */
5392 : void
5393 3 : bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid,
5394 : bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg)
5395 : {
5396 : struct bdev_nvme_set_preferred_path_ctx *ctx;
5397 : struct spdk_bdev *bdev;
5398 : struct nvme_bdev *nbdev;
5399 3 : int rc = 0;
5400 :
5401 3 : assert(cb_fn != NULL);
5402 :
5403 3 : ctx = calloc(1, sizeof(*ctx));
5404 3 : if (ctx == NULL) {
5405 0 : SPDK_ERRLOG("Failed to alloc context.\n");
5406 0 : rc = -ENOMEM;
5407 0 : goto err_alloc;
5408 : }
5409 :
5410 3 : ctx->cb_fn = cb_fn;
5411 3 : ctx->cb_arg = cb_arg;
5412 :
5413 3 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
5414 3 : if (rc != 0) {
5415 0 : SPDK_ERRLOG("Failed to open bdev %s.\n", name);
5416 0 : goto err_open;
5417 : }
5418 :
5419 3 : bdev = spdk_bdev_desc_get_bdev(ctx->desc);
5420 :
5421 3 : if (bdev->module != &nvme_if) {
5422 0 : SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
5423 0 : rc = -ENODEV;
5424 0 : goto err_bdev;
5425 : }
5426 :
5427 3 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
5428 :
5429 3 : pthread_mutex_lock(&nbdev->mutex);
5430 :
5431 3 : ctx->nvme_ns = bdev_nvme_set_preferred_ns(nbdev, cntlid);
5432 3 : if (ctx->nvme_ns == NULL) {
5433 0 : pthread_mutex_unlock(&nbdev->mutex);
5434 :
5435 0 : SPDK_ERRLOG("bdev %s does not have namespace to controller %u.\n", name, cntlid);
5436 0 : rc = -ENODEV;
5437 0 : goto err_bdev;
5438 : }
5439 :
5440 3 : pthread_mutex_unlock(&nbdev->mutex);
5441 :
5442 6 : nvme_bdev_for_each_channel(nbdev,
5443 : _bdev_nvme_set_preferred_path,
5444 3 : ctx,
5445 : bdev_nvme_set_preferred_path_done);
5446 3 : return;
5447 :
5448 : err_bdev:
5449 0 : spdk_bdev_close(ctx->desc);
5450 : err_open:
5451 0 : free(ctx);
5452 : err_alloc:
5453 0 : cb_fn(cb_arg, rc);
5454 3 : }
5455 :
5456 : struct bdev_nvme_set_multipath_policy_ctx {
5457 : struct spdk_bdev_desc *desc;
5458 : spdk_bdev_nvme_set_multipath_policy_cb cb_fn;
5459 : void *cb_arg;
5460 : };
5461 :
5462 : static void
5463 3 : bdev_nvme_set_multipath_policy_done(struct nvme_bdev *nbdev, void *_ctx, int status)
5464 : {
5465 3 : struct bdev_nvme_set_multipath_policy_ctx *ctx = _ctx;
5466 :
5467 3 : assert(ctx != NULL);
5468 3 : assert(ctx->desc != NULL);
5469 3 : assert(ctx->cb_fn != NULL);
5470 :
5471 3 : spdk_bdev_close(ctx->desc);
5472 :
5473 3 : ctx->cb_fn(ctx->cb_arg, status);
5474 :
5475 3 : free(ctx);
5476 3 : }
5477 :
5478 : static void
5479 1 : _bdev_nvme_set_multipath_policy(struct nvme_bdev_channel_iter *i,
5480 : struct nvme_bdev *nbdev,
5481 : struct nvme_bdev_channel *nbdev_ch, void *ctx)
5482 : {
5483 1 : nbdev_ch->mp_policy = nbdev->mp_policy;
5484 1 : nbdev_ch->mp_selector = nbdev->mp_selector;
5485 1 : nbdev_ch->rr_min_io = nbdev->rr_min_io;
5486 1 : bdev_nvme_clear_current_io_path(nbdev_ch);
5487 :
5488 1 : nvme_bdev_for_each_channel_continue(i, 0);
5489 1 : }
5490 :
5491 : void
5492 3 : spdk_bdev_nvme_set_multipath_policy(const char *name, enum spdk_bdev_nvme_multipath_policy policy,
5493 : enum spdk_bdev_nvme_multipath_selector selector, uint32_t rr_min_io,
5494 : spdk_bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
5495 : {
5496 : struct bdev_nvme_set_multipath_policy_ctx *ctx;
5497 : struct spdk_bdev *bdev;
5498 : struct nvme_bdev *nbdev;
5499 : int rc;
5500 :
5501 3 : assert(cb_fn != NULL);
5502 :
5503 3 : switch (policy) {
5504 : case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
5505 1 : break;
5506 : case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
5507 2 : switch (selector) {
5508 : case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
5509 1 : if (rr_min_io == UINT32_MAX) {
5510 0 : rr_min_io = 1;
5511 1 : } else if (rr_min_io == 0) {
5512 0 : rc = -EINVAL;
5513 0 : goto exit;
5514 : }
5515 1 : break;
5516 : case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
5517 1 : break;
5518 : default:
5519 0 : rc = -EINVAL;
5520 0 : goto exit;
5521 : }
5522 2 : break;
5523 : default:
5524 0 : rc = -EINVAL;
5525 0 : goto exit;
5526 : }
5527 :
5528 3 : ctx = calloc(1, sizeof(*ctx));
5529 3 : if (ctx == NULL) {
5530 0 : SPDK_ERRLOG("Failed to alloc context.\n");
5531 0 : rc = -ENOMEM;
5532 0 : goto exit;
5533 : }
5534 :
5535 3 : ctx->cb_fn = cb_fn;
5536 3 : ctx->cb_arg = cb_arg;
5537 :
5538 3 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
5539 3 : if (rc != 0) {
5540 0 : SPDK_ERRLOG("Failed to open bdev %s.\n", name);
5541 0 : rc = -ENODEV;
5542 0 : goto err_open;
5543 : }
5544 :
5545 3 : bdev = spdk_bdev_desc_get_bdev(ctx->desc);
5546 3 : if (bdev->module != &nvme_if) {
5547 0 : SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
5548 0 : rc = -ENODEV;
5549 0 : goto err_module;
5550 : }
5551 3 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
5552 :
5553 3 : pthread_mutex_lock(&nbdev->mutex);
5554 3 : nbdev->mp_policy = policy;
5555 3 : nbdev->mp_selector = selector;
5556 3 : nbdev->rr_min_io = rr_min_io;
5557 3 : pthread_mutex_unlock(&nbdev->mutex);
5558 :
5559 6 : nvme_bdev_for_each_channel(nbdev,
5560 : _bdev_nvme_set_multipath_policy,
5561 3 : ctx,
5562 : bdev_nvme_set_multipath_policy_done);
5563 3 : return;
5564 :
5565 : err_module:
5566 0 : spdk_bdev_close(ctx->desc);
5567 : err_open:
5568 0 : free(ctx);
5569 : exit:
5570 0 : cb_fn(cb_arg, rc);
5571 3 : }
5572 :
5573 : static void
5574 3 : aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
5575 : {
5576 3 : struct nvme_ctrlr *nvme_ctrlr = arg;
5577 : union spdk_nvme_async_event_completion event;
5578 :
5579 3 : if (spdk_nvme_cpl_is_error(cpl)) {
5580 0 : SPDK_WARNLOG("AER request execute failed\n");
5581 0 : return;
5582 : }
5583 :
5584 3 : event.raw = cpl->cdw0;
5585 3 : if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
5586 3 : (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
5587 2 : nvme_ctrlr_populate_namespaces(nvme_ctrlr, NULL);
5588 3 : } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
5589 1 : (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
5590 1 : nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
5591 1 : }
5592 3 : }
5593 :
5594 : static void
5595 58 : free_nvme_async_probe_ctx(struct nvme_async_probe_ctx *ctx)
5596 : {
5597 58 : spdk_keyring_put_key(ctx->drv_opts.tls_psk);
5598 58 : spdk_keyring_put_key(ctx->drv_opts.dhchap_key);
5599 58 : spdk_keyring_put_key(ctx->drv_opts.dhchap_ctrlr_key);
5600 58 : free(ctx);
5601 58 : }
5602 :
5603 : static void
5604 58 : populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, int rc)
5605 : {
5606 58 : if (ctx->cb_fn) {
5607 58 : ctx->cb_fn(ctx->cb_ctx, ctx->reported_bdevs, rc);
5608 58 : }
5609 :
5610 58 : ctx->namespaces_populated = true;
5611 58 : if (ctx->probe_done) {
5612 : /* The probe was already completed, so we need to free the context
5613 : * here. This can happen for cases like OCSSD, where we need to
5614 : * send additional commands to the SSD after attach.
5615 : */
5616 37 : free_nvme_async_probe_ctx(ctx);
5617 37 : }
5618 58 : }
5619 :
5620 : static int
5621 20 : bdev_nvme_remove_poller(void *ctx)
5622 : {
5623 : struct spdk_nvme_transport_id trid_pcie;
5624 :
5625 20 : if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
5626 1 : spdk_poller_unregister(&g_hotplug_poller);
5627 1 : return SPDK_POLLER_IDLE;
5628 : }
5629 :
5630 19 : memset(&trid_pcie, 0, sizeof(trid_pcie));
5631 19 : spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
5632 :
5633 19 : if (spdk_nvme_scan_attached(&trid_pcie)) {
5634 0 : SPDK_ERRLOG_RATELIMIT("spdk_nvme_scan_attached() failed\n");
5635 0 : }
5636 :
5637 19 : return SPDK_POLLER_BUSY;
5638 20 : }
5639 :
5640 : static void
5641 66 : nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr,
5642 : struct nvme_async_probe_ctx *ctx)
5643 : {
5644 66 : struct spdk_nvme_transport_id *trid = &nvme_ctrlr->active_path_id->trid;
5645 :
5646 66 : if (spdk_nvme_trtype_is_fabrics(trid->trtype)) {
5647 66 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was created to %s:%s\n",
5648 : trid->traddr, trid->trsvcid);
5649 66 : } else {
5650 0 : NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was created\n");
5651 : }
5652 :
5653 132 : spdk_io_device_register(nvme_ctrlr,
5654 : bdev_nvme_create_ctrlr_channel_cb,
5655 : bdev_nvme_destroy_ctrlr_channel_cb,
5656 : sizeof(struct nvme_ctrlr_channel),
5657 66 : nvme_ctrlr->nbdev_ctrlr->name);
5658 :
5659 66 : nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx);
5660 :
5661 66 : if (g_hotplug_poller == NULL) {
5662 2 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
5663 : NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
5664 2 : }
5665 66 : }
5666 :
5667 : static void
5668 36 : nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl)
5669 : {
5670 36 : struct nvme_ctrlr *nvme_ctrlr = _ctx;
5671 36 : struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx;
5672 :
5673 36 : nvme_ctrlr->probe_ctx = NULL;
5674 :
5675 36 : if (spdk_nvme_cpl_is_error(cpl)) {
5676 0 : nvme_ctrlr_delete(nvme_ctrlr);
5677 :
5678 0 : if (ctx != NULL) {
5679 0 : ctx->reported_bdevs = 0;
5680 0 : populate_namespaces_cb(ctx, -1);
5681 0 : }
5682 0 : return;
5683 : }
5684 :
5685 36 : nvme_ctrlr_create_done(nvme_ctrlr, ctx);
5686 36 : }
5687 :
5688 : static int
5689 36 : nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
5690 : struct nvme_async_probe_ctx *ctx)
5691 : {
5692 36 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5693 : const struct spdk_nvme_ctrlr_data *cdata;
5694 : uint32_t ana_log_page_size;
5695 :
5696 36 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5697 :
5698 : /* Set buffer size enough to include maximum number of allowed namespaces. */
5699 72 : ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
5700 36 : sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->mnan *
5701 : sizeof(uint32_t);
5702 :
5703 36 : nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL,
5704 : SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
5705 36 : if (nvme_ctrlr->ana_log_page == NULL) {
5706 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "could not allocate ANA log page buffer\n");
5707 0 : return -ENXIO;
5708 : }
5709 :
5710 : /* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned.
5711 : * Hence copy each descriptor to a temporary area when parsing it.
5712 : *
5713 : * Allocate a buffer whose size is as large as ANA log page buffer because
5714 : * we do not know the size of a descriptor until actually reading it.
5715 : */
5716 36 : nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
5717 36 : if (nvme_ctrlr->copied_ana_desc == NULL) {
5718 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "could not allocate a buffer to parse ANA descriptor\n");
5719 0 : return -ENOMEM;
5720 : }
5721 :
5722 36 : nvme_ctrlr->max_ana_log_page_size = ana_log_page_size;
5723 :
5724 36 : nvme_ctrlr->probe_ctx = ctx;
5725 :
5726 : /* Then, set the read size only to include the current active namespaces. */
5727 36 : ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
5728 :
5729 36 : if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
5730 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
5731 : ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
5732 0 : return -EINVAL;
5733 : }
5734 :
5735 72 : return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
5736 : SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
5737 : SPDK_NVME_GLOBAL_NS_TAG,
5738 36 : nvme_ctrlr->ana_log_page,
5739 36 : ana_log_page_size, 0,
5740 : nvme_ctrlr_init_ana_log_page_done,
5741 36 : nvme_ctrlr);
5742 36 : }
5743 :
5744 : /* hostnqn and subnqn were already verified before attaching a controller.
5745 : * Hence check only the multipath capability and cntlid here.
5746 : */
5747 : static bool
5748 19 : bdev_nvme_check_multipath(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct spdk_nvme_ctrlr *ctrlr)
5749 : {
5750 : struct nvme_ctrlr *tmp;
5751 : const struct spdk_nvme_ctrlr_data *cdata, *tmp_cdata;
5752 :
5753 19 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5754 :
5755 19 : if (!cdata->cmic.multi_ctrlr) {
5756 0 : SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
5757 0 : return false;
5758 : }
5759 :
5760 40 : TAILQ_FOREACH(tmp, &nbdev_ctrlr->ctrlrs, tailq) {
5761 22 : tmp_cdata = spdk_nvme_ctrlr_get_data(tmp->ctrlr);
5762 :
5763 22 : if (!tmp_cdata->cmic.multi_ctrlr) {
5764 0 : NVME_CTRLR_ERRLOG(tmp, "Ctrlr%u does not support multipath.\n", cdata->cntlid);
5765 0 : return false;
5766 : }
5767 22 : if (cdata->cntlid == tmp_cdata->cntlid) {
5768 1 : NVME_CTRLR_ERRLOG(tmp, "cntlid %u are duplicated.\n", tmp_cdata->cntlid);
5769 1 : return false;
5770 : }
5771 21 : }
5772 :
5773 18 : return true;
5774 19 : }
5775 :
5776 :
5777 : static int
5778 67 : nvme_bdev_ctrlr_create(const char *name, struct nvme_ctrlr *nvme_ctrlr)
5779 : {
5780 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
5781 67 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5782 : struct nvme_ctrlr *nctrlr;
5783 67 : int rc = 0;
5784 :
5785 67 : pthread_mutex_lock(&g_bdev_nvme_mutex);
5786 :
5787 67 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
5788 67 : if (nbdev_ctrlr != NULL) {
5789 19 : if (!bdev_nvme_check_multipath(nbdev_ctrlr, ctrlr)) {
5790 1 : rc = -EINVAL;
5791 1 : goto exit;
5792 : }
5793 39 : TAILQ_FOREACH(nctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
5794 21 : if (nctrlr->opts.multipath != nvme_ctrlr->opts.multipath) {
5795 : /* All controllers with the same name must be configured the same
5796 : * way, either for multipath or failover. If the configuration doesn't
5797 : * match - report error.
5798 : */
5799 0 : rc = -EINVAL;
5800 0 : goto exit;
5801 : }
5802 21 : }
5803 18 : } else {
5804 48 : nbdev_ctrlr = calloc(1, sizeof(*nbdev_ctrlr));
5805 48 : if (nbdev_ctrlr == NULL) {
5806 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate nvme_bdev_ctrlr.\n");
5807 0 : rc = -ENOMEM;
5808 0 : goto exit;
5809 : }
5810 48 : nbdev_ctrlr->name = strdup(name);
5811 48 : if (nbdev_ctrlr->name == NULL) {
5812 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate name of nvme_bdev_ctrlr.\n");
5813 0 : free(nbdev_ctrlr);
5814 0 : goto exit;
5815 : }
5816 48 : TAILQ_INIT(&nbdev_ctrlr->ctrlrs);
5817 48 : TAILQ_INIT(&nbdev_ctrlr->bdevs);
5818 48 : TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
5819 : }
5820 66 : nvme_ctrlr->nbdev_ctrlr = nbdev_ctrlr;
5821 66 : TAILQ_INSERT_TAIL(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
5822 : exit:
5823 67 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
5824 67 : return rc;
5825 : }
5826 :
5827 : static int
5828 67 : nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
5829 : const char *name,
5830 : const struct spdk_nvme_transport_id *trid,
5831 : struct nvme_async_probe_ctx *ctx)
5832 : {
5833 : struct nvme_ctrlr *nvme_ctrlr;
5834 : struct nvme_path_id *path_id;
5835 : const struct spdk_nvme_ctrlr_data *cdata;
5836 67 : struct spdk_event_handler_opts opts = {
5837 : .opts_size = SPDK_SIZEOF(&opts, fd_type),
5838 : };
5839 : uint64_t period;
5840 : int fd, rc;
5841 :
5842 67 : nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
5843 67 : if (nvme_ctrlr == NULL) {
5844 0 : SPDK_ERRLOG("Failed to allocate device struct\n");
5845 0 : return -ENOMEM;
5846 : }
5847 :
5848 67 : rc = pthread_mutex_init(&nvme_ctrlr->mutex, NULL);
5849 67 : if (rc != 0) {
5850 0 : free(nvme_ctrlr);
5851 0 : return rc;
5852 : }
5853 :
5854 67 : TAILQ_INIT(&nvme_ctrlr->trids);
5855 67 : TAILQ_INIT(&nvme_ctrlr->pending_resets);
5856 67 : RB_INIT(&nvme_ctrlr->namespaces);
5857 :
5858 : /* Get another reference to the key, so the first one can be released from probe_ctx */
5859 67 : if (ctx != NULL) {
5860 53 : if (ctx->drv_opts.tls_psk != NULL) {
5861 0 : nvme_ctrlr->psk = spdk_keyring_get_key(
5862 0 : spdk_key_get_name(ctx->drv_opts.tls_psk));
5863 0 : if (nvme_ctrlr->psk == NULL) {
5864 : /* Could only happen if the key was removed in the meantime */
5865 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5866 : spdk_key_get_name(ctx->drv_opts.tls_psk));
5867 0 : rc = -ENOKEY;
5868 0 : goto err;
5869 : }
5870 0 : }
5871 :
5872 53 : if (ctx->drv_opts.dhchap_key != NULL) {
5873 0 : nvme_ctrlr->dhchap_key = spdk_keyring_get_key(
5874 0 : spdk_key_get_name(ctx->drv_opts.dhchap_key));
5875 0 : if (nvme_ctrlr->dhchap_key == NULL) {
5876 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5877 : spdk_key_get_name(ctx->drv_opts.dhchap_key));
5878 0 : rc = -ENOKEY;
5879 0 : goto err;
5880 : }
5881 0 : }
5882 :
5883 53 : if (ctx->drv_opts.dhchap_ctrlr_key != NULL) {
5884 0 : nvme_ctrlr->dhchap_ctrlr_key =
5885 0 : spdk_keyring_get_key(
5886 0 : spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
5887 0 : if (nvme_ctrlr->dhchap_ctrlr_key == NULL) {
5888 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5889 : spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
5890 0 : rc = -ENOKEY;
5891 0 : goto err;
5892 : }
5893 0 : }
5894 53 : }
5895 :
5896 : /* Check if we manage to enable interrupts on the controller. */
5897 67 : if (spdk_interrupt_mode_is_enabled() && ctx != NULL && !ctx->drv_opts.enable_interrupts) {
5898 0 : SPDK_ERRLOG("Failed to enable interrupts on the controller\n");
5899 0 : rc = -ENOTSUP;
5900 0 : goto err;
5901 : }
5902 :
5903 67 : path_id = calloc(1, sizeof(*path_id));
5904 67 : if (path_id == NULL) {
5905 0 : SPDK_ERRLOG("Failed to allocate trid entry pointer\n");
5906 0 : rc = -ENOMEM;
5907 0 : goto err;
5908 : }
5909 :
5910 67 : path_id->trid = *trid;
5911 67 : if (ctx != NULL) {
5912 53 : memcpy(path_id->hostid.hostaddr, ctx->drv_opts.src_addr, sizeof(path_id->hostid.hostaddr));
5913 53 : memcpy(path_id->hostid.hostsvcid, ctx->drv_opts.src_svcid, sizeof(path_id->hostid.hostsvcid));
5914 53 : }
5915 67 : nvme_ctrlr->active_path_id = path_id;
5916 67 : TAILQ_INSERT_HEAD(&nvme_ctrlr->trids, path_id, link);
5917 :
5918 67 : nvme_ctrlr->thread = spdk_get_thread();
5919 67 : nvme_ctrlr->ctrlr = ctrlr;
5920 67 : nvme_ctrlr->ref = 1;
5921 :
5922 67 : if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
5923 0 : SPDK_ERRLOG("OCSSDs are not supported");
5924 0 : rc = -ENOTSUP;
5925 0 : goto err;
5926 : }
5927 :
5928 67 : if (ctx != NULL) {
5929 53 : memcpy(&nvme_ctrlr->opts, &ctx->bdev_opts, sizeof(ctx->bdev_opts));
5930 53 : } else {
5931 14 : spdk_bdev_nvme_get_default_ctrlr_opts(&nvme_ctrlr->opts);
5932 : }
5933 :
5934 67 : period = spdk_interrupt_mode_is_enabled() ? 0 : g_opts.nvme_adminq_poll_period_us;
5935 :
5936 67 : nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr,
5937 : period);
5938 :
5939 67 : if (spdk_interrupt_mode_is_enabled()) {
5940 0 : spdk_poller_register_interrupt(nvme_ctrlr->adminq_timer_poller, NULL, NULL);
5941 :
5942 0 : fd = spdk_nvme_ctrlr_get_admin_qp_fd(nvme_ctrlr->ctrlr, &opts);
5943 0 : if (fd < 0) {
5944 0 : rc = fd;
5945 0 : goto err;
5946 : }
5947 :
5948 0 : nvme_ctrlr->intr = SPDK_INTERRUPT_REGISTER_EXT(fd, bdev_nvme_poll_adminq,
5949 : nvme_ctrlr, &opts);
5950 0 : if (!nvme_ctrlr->intr) {
5951 0 : rc = -EINVAL;
5952 0 : goto err;
5953 : }
5954 0 : }
5955 :
5956 67 : if (g_opts.timeout_us > 0) {
5957 : /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */
5958 : /* If timeout_admin_us is 0 (not specified), admin uses same timeout as IO. */
5959 0 : uint64_t adm_timeout_us = (g_opts.timeout_admin_us == 0) ?
5960 0 : g_opts.timeout_us : g_opts.timeout_admin_us;
5961 0 : spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
5962 0 : adm_timeout_us, timeout_cb, nvme_ctrlr);
5963 0 : }
5964 :
5965 67 : spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
5966 67 : spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr);
5967 :
5968 67 : if (spdk_nvme_ctrlr_get_flags(ctrlr) &
5969 : SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
5970 0 : nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr);
5971 0 : }
5972 :
5973 67 : rc = nvme_bdev_ctrlr_create(name, nvme_ctrlr);
5974 67 : if (rc != 0) {
5975 1 : goto err;
5976 : }
5977 :
5978 66 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5979 :
5980 66 : if (cdata->cmic.ana_reporting) {
5981 36 : rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx);
5982 36 : if (rc == 0) {
5983 36 : return 0;
5984 : }
5985 0 : } else {
5986 30 : nvme_ctrlr_create_done(nvme_ctrlr, ctx);
5987 30 : return 0;
5988 : }
5989 :
5990 : err:
5991 1 : nvme_ctrlr_delete(nvme_ctrlr);
5992 1 : return rc;
5993 67 : }
5994 :
5995 : void
5996 35 : spdk_bdev_nvme_get_default_ctrlr_opts(struct spdk_bdev_nvme_ctrlr_opts *opts)
5997 : {
5998 35 : opts->prchk_flags = 0;
5999 35 : opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
6000 35 : opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
6001 35 : opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
6002 35 : opts->multipath = true;
6003 35 : }
6004 :
6005 : static void
6006 0 : attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6007 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
6008 : {
6009 : char *name;
6010 :
6011 0 : name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
6012 0 : if (!name) {
6013 0 : SPDK_ERRLOG("Failed to assign name to NVMe device\n");
6014 0 : return;
6015 : }
6016 :
6017 0 : if (nvme_ctrlr_create(ctrlr, name, trid, NULL) == 0) {
6018 0 : SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name);
6019 0 : } else {
6020 0 : SPDK_ERRLOG("Failed to attach to %s (%s)\n", trid->traddr, name);
6021 : }
6022 :
6023 0 : free(name);
6024 0 : }
6025 :
6026 : static void
6027 66 : _nvme_ctrlr_destruct(void *ctx)
6028 : {
6029 66 : struct nvme_ctrlr *nvme_ctrlr = ctx;
6030 :
6031 66 : nvme_ctrlr_depopulate_namespaces(nvme_ctrlr);
6032 66 : nvme_ctrlr_put_ref(nvme_ctrlr);
6033 66 : }
6034 :
6035 : static int
6036 63 : bdev_nvme_delete_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
6037 : {
6038 : struct nvme_probe_skip_entry *entry;
6039 :
6040 : /* The controller's destruction was already started */
6041 63 : if (nvme_ctrlr->destruct) {
6042 0 : return -EALREADY;
6043 : }
6044 :
6045 63 : if (!hotplug &&
6046 63 : nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
6047 0 : entry = calloc(1, sizeof(*entry));
6048 0 : if (!entry) {
6049 0 : return -ENOMEM;
6050 : }
6051 0 : entry->trid = nvme_ctrlr->active_path_id->trid;
6052 0 : TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
6053 0 : }
6054 :
6055 63 : nvme_ctrlr->destruct = true;
6056 63 : return 0;
6057 63 : }
6058 :
6059 : static int
6060 2 : bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
6061 : {
6062 : int rc;
6063 :
6064 2 : pthread_mutex_lock(&nvme_ctrlr->mutex);
6065 2 : rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, hotplug);
6066 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6067 :
6068 2 : if (rc == 0) {
6069 2 : _nvme_ctrlr_destruct(nvme_ctrlr);
6070 2 : } else if (rc == -EALREADY) {
6071 0 : rc = 0;
6072 0 : }
6073 :
6074 2 : return rc;
6075 : }
6076 :
6077 : static void
6078 0 : remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
6079 : {
6080 0 : struct nvme_ctrlr *nvme_ctrlr = cb_ctx;
6081 :
6082 0 : bdev_nvme_delete_ctrlr(nvme_ctrlr, true);
6083 0 : }
6084 :
6085 : static int
6086 0 : bdev_nvme_hotplug_probe(void *arg)
6087 : {
6088 0 : if (g_hotplug_probe_ctx == NULL) {
6089 0 : spdk_poller_unregister(&g_hotplug_probe_poller);
6090 0 : return SPDK_POLLER_IDLE;
6091 : }
6092 :
6093 0 : if (spdk_nvme_probe_poll_async(g_hotplug_probe_ctx) != -EAGAIN) {
6094 0 : g_hotplug_probe_ctx = NULL;
6095 0 : spdk_poller_unregister(&g_hotplug_probe_poller);
6096 0 : }
6097 :
6098 0 : return SPDK_POLLER_BUSY;
6099 0 : }
6100 :
6101 : static int
6102 0 : bdev_nvme_hotplug(void *arg)
6103 : {
6104 : struct spdk_nvme_transport_id trid_pcie;
6105 :
6106 0 : if (g_hotplug_probe_ctx) {
6107 0 : return SPDK_POLLER_BUSY;
6108 : }
6109 :
6110 0 : memset(&trid_pcie, 0, sizeof(trid_pcie));
6111 0 : spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
6112 :
6113 0 : g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
6114 : hotplug_probe_cb, attach_cb, NULL);
6115 :
6116 0 : if (g_hotplug_probe_ctx) {
6117 0 : assert(g_hotplug_probe_poller == NULL);
6118 0 : g_hotplug_probe_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug_probe, NULL, 1000);
6119 0 : }
6120 :
6121 0 : return SPDK_POLLER_BUSY;
6122 0 : }
6123 :
6124 : void
6125 0 : bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
6126 : {
6127 0 : *opts = g_opts;
6128 0 : }
6129 :
6130 : static bool bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
6131 : uint32_t reconnect_delay_sec,
6132 : uint32_t fast_io_fail_timeout_sec);
6133 :
6134 : static int
6135 0 : bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
6136 : {
6137 0 : if ((opts->timeout_us == 0) && (opts->timeout_admin_us != 0)) {
6138 : /* Can't set timeout_admin_us without also setting timeout_us */
6139 0 : SPDK_WARNLOG("Invalid options: Can't have (timeout_us == 0) with (timeout_admin_us > 0)\n");
6140 0 : return -EINVAL;
6141 : }
6142 :
6143 0 : if (opts->bdev_retry_count < -1) {
6144 0 : SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n");
6145 0 : return -EINVAL;
6146 : }
6147 :
6148 0 : if (!bdev_nvme_check_io_error_resiliency_params(opts->ctrlr_loss_timeout_sec,
6149 0 : opts->reconnect_delay_sec,
6150 0 : opts->fast_io_fail_timeout_sec)) {
6151 0 : return -EINVAL;
6152 : }
6153 :
6154 0 : return 0;
6155 0 : }
6156 :
6157 : int
6158 0 : bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
6159 : {
6160 : int ret;
6161 :
6162 0 : ret = bdev_nvme_validate_opts(opts);
6163 0 : if (ret) {
6164 0 : SPDK_WARNLOG("Failed to set nvme opts.\n");
6165 0 : return ret;
6166 : }
6167 :
6168 0 : if (g_bdev_nvme_init_thread != NULL) {
6169 0 : if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
6170 0 : return -EPERM;
6171 : }
6172 0 : }
6173 :
6174 0 : if (opts->rdma_srq_size != 0 ||
6175 0 : opts->rdma_max_cq_size != 0 ||
6176 0 : opts->rdma_cm_event_timeout_ms != 0) {
6177 : struct spdk_nvme_transport_opts drv_opts;
6178 :
6179 0 : spdk_nvme_transport_get_opts(&drv_opts, sizeof(drv_opts));
6180 0 : if (opts->rdma_srq_size != 0) {
6181 0 : drv_opts.rdma_srq_size = opts->rdma_srq_size;
6182 0 : }
6183 0 : if (opts->rdma_max_cq_size != 0) {
6184 0 : drv_opts.rdma_max_cq_size = opts->rdma_max_cq_size;
6185 0 : }
6186 0 : if (opts->rdma_cm_event_timeout_ms != 0) {
6187 0 : drv_opts.rdma_cm_event_timeout_ms = opts->rdma_cm_event_timeout_ms;
6188 0 : }
6189 :
6190 0 : ret = spdk_nvme_transport_set_opts(&drv_opts, sizeof(drv_opts));
6191 0 : if (ret) {
6192 0 : SPDK_ERRLOG("Failed to set NVMe transport opts.\n");
6193 0 : return ret;
6194 : }
6195 0 : }
6196 :
6197 0 : g_opts = *opts;
6198 :
6199 0 : return 0;
6200 0 : }
6201 :
6202 : struct set_nvme_hotplug_ctx {
6203 : uint64_t period_us;
6204 : bool enabled;
6205 : spdk_msg_fn fn;
6206 : void *fn_ctx;
6207 : };
6208 :
6209 : static void
6210 0 : set_nvme_hotplug_period_cb(void *_ctx)
6211 : {
6212 0 : struct set_nvme_hotplug_ctx *ctx = _ctx;
6213 :
6214 0 : spdk_poller_unregister(&g_hotplug_poller);
6215 0 : if (ctx->enabled) {
6216 0 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us);
6217 0 : } else {
6218 0 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
6219 : NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
6220 : }
6221 :
6222 0 : g_nvme_hotplug_poll_period_us = ctx->period_us;
6223 0 : g_nvme_hotplug_enabled = ctx->enabled;
6224 0 : if (ctx->fn) {
6225 0 : ctx->fn(ctx->fn_ctx);
6226 0 : }
6227 :
6228 0 : free(ctx);
6229 0 : }
6230 :
6231 : int
6232 0 : bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
6233 : {
6234 : struct set_nvme_hotplug_ctx *ctx;
6235 :
6236 0 : if (enabled == true && !spdk_process_is_primary()) {
6237 0 : return -EPERM;
6238 : }
6239 :
6240 0 : ctx = calloc(1, sizeof(*ctx));
6241 0 : if (ctx == NULL) {
6242 0 : return -ENOMEM;
6243 : }
6244 :
6245 0 : period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
6246 0 : ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
6247 0 : ctx->enabled = enabled;
6248 0 : ctx->fn = cb;
6249 0 : ctx->fn_ctx = cb_ctx;
6250 :
6251 0 : spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
6252 0 : return 0;
6253 0 : }
6254 :
6255 : static void
6256 52 : nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
6257 : struct nvme_async_probe_ctx *ctx)
6258 : {
6259 : struct nvme_ns *nvme_ns;
6260 : struct nvme_bdev *nvme_bdev;
6261 : size_t j;
6262 :
6263 52 : assert(nvme_ctrlr != NULL);
6264 :
6265 52 : if (ctx->names == NULL) {
6266 0 : ctx->reported_bdevs = 0;
6267 0 : populate_namespaces_cb(ctx, 0);
6268 0 : return;
6269 : }
6270 :
6271 : /*
6272 : * Report the new bdevs that were created in this call.
6273 : * There can be more than one bdev per NVMe controller.
6274 : */
6275 52 : j = 0;
6276 52 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
6277 101 : while (nvme_ns != NULL) {
6278 49 : nvme_bdev = nvme_ns->bdev;
6279 49 : if (j < ctx->max_bdevs) {
6280 49 : ctx->names[j] = nvme_bdev->disk.name;
6281 49 : j++;
6282 49 : } else {
6283 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr,
6284 : "Maximum number of namespaces supported per NVMe controller is %du. "
6285 : "Unable to return all names of created bdevs\n",
6286 : ctx->max_bdevs);
6287 0 : ctx->reported_bdevs = 0;
6288 0 : populate_namespaces_cb(ctx, -ERANGE);
6289 0 : return;
6290 : }
6291 :
6292 49 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
6293 : }
6294 :
6295 52 : ctx->reported_bdevs = j;
6296 52 : populate_namespaces_cb(ctx, 0);
6297 52 : }
6298 :
6299 : static int
6300 9 : bdev_nvme_check_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
6301 : struct spdk_nvme_ctrlr *new_ctrlr,
6302 : struct spdk_nvme_transport_id *trid)
6303 : {
6304 : struct nvme_path_id *tmp_trid;
6305 :
6306 9 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
6307 0 : NVME_CTRLR_ERRLOG(nvme_ctrlr, "PCIe failover is not supported.\n");
6308 0 : return -ENOTSUP;
6309 : }
6310 :
6311 : /* Currently we only support failover to the same transport type. */
6312 9 : if (nvme_ctrlr->active_path_id->trid.trtype != trid->trtype) {
6313 0 : NVME_CTRLR_WARNLOG(nvme_ctrlr,
6314 : "Failover from trtype: %s to a different trtype: %s is not supported currently\n",
6315 : spdk_nvme_transport_id_trtype_str(nvme_ctrlr->active_path_id->trid.trtype),
6316 : spdk_nvme_transport_id_trtype_str(trid->trtype));
6317 0 : return -EINVAL;
6318 : }
6319 :
6320 :
6321 : /* Currently we only support failover to the same NQN. */
6322 9 : if (strncmp(trid->subnqn, nvme_ctrlr->active_path_id->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
6323 0 : NVME_CTRLR_WARNLOG(nvme_ctrlr,
6324 : "Failover from subnqn: %s to a different subnqn: %s is not supported currently\n",
6325 : nvme_ctrlr->active_path_id->trid.subnqn, trid->subnqn);
6326 0 : return -EINVAL;
6327 : }
6328 :
6329 : /* Skip all the other checks if we've already registered this path. */
6330 21 : TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) {
6331 12 : if (!spdk_nvme_transport_id_compare(&tmp_trid->trid, trid)) {
6332 0 : NVME_CTRLR_WARNLOG(nvme_ctrlr, "This path (traddr: %s subnqn: %s) is already registered\n",
6333 : trid->traddr, trid->subnqn);
6334 0 : return -EALREADY;
6335 : }
6336 12 : }
6337 :
6338 9 : return 0;
6339 9 : }
6340 :
6341 : static int
6342 9 : bdev_nvme_check_secondary_namespace(struct nvme_ctrlr *nvme_ctrlr,
6343 : struct spdk_nvme_ctrlr *new_ctrlr)
6344 : {
6345 : struct nvme_ns *nvme_ns;
6346 : struct spdk_nvme_ns *new_ns;
6347 :
6348 9 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
6349 9 : while (nvme_ns != NULL) {
6350 0 : new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id);
6351 0 : assert(new_ns != NULL);
6352 :
6353 0 : if (!bdev_nvme_compare_ns(nvme_ns->ns, new_ns)) {
6354 0 : return -EINVAL;
6355 : }
6356 :
6357 0 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
6358 : }
6359 :
6360 9 : return 0;
6361 9 : }
6362 :
6363 : static int
6364 9 : _bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
6365 : struct spdk_nvme_transport_id *trid)
6366 : {
6367 : struct nvme_path_id *active_id, *new_trid, *tmp_trid;
6368 :
6369 9 : new_trid = calloc(1, sizeof(*new_trid));
6370 9 : if (new_trid == NULL) {
6371 0 : return -ENOMEM;
6372 : }
6373 9 : new_trid->trid = *trid;
6374 :
6375 9 : active_id = nvme_ctrlr->active_path_id;
6376 9 : assert(active_id != NULL);
6377 9 : assert(active_id == TAILQ_FIRST(&nvme_ctrlr->trids));
6378 :
6379 : /* Skip the active trid not to replace it until it is failed. */
6380 9 : tmp_trid = TAILQ_NEXT(active_id, link);
6381 9 : if (tmp_trid == NULL) {
6382 6 : goto add_tail;
6383 : }
6384 :
6385 : /* It means the trid is faled if its last failed time is non-zero.
6386 : * Insert the new alternate trid before any failed trid.
6387 : */
6388 5 : TAILQ_FOREACH_FROM(tmp_trid, &nvme_ctrlr->trids, link) {
6389 3 : if (tmp_trid->last_failed_tsc != 0) {
6390 1 : TAILQ_INSERT_BEFORE(tmp_trid, new_trid, link);
6391 1 : return 0;
6392 : }
6393 4 : }
6394 :
6395 : add_tail:
6396 8 : TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, new_trid, link);
6397 8 : return 0;
6398 9 : }
6399 :
6400 : /* This is the case that a secondary path is added to an existing
6401 : * nvme_ctrlr for failover. After checking if it can access the same
6402 : * namespaces as the primary path, it is disconnected until failover occurs.
6403 : */
6404 : static int
6405 9 : bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
6406 : struct spdk_nvme_ctrlr *new_ctrlr,
6407 : struct spdk_nvme_transport_id *trid)
6408 : {
6409 : int rc;
6410 :
6411 9 : assert(nvme_ctrlr != NULL);
6412 :
6413 9 : pthread_mutex_lock(&nvme_ctrlr->mutex);
6414 :
6415 9 : rc = bdev_nvme_check_secondary_trid(nvme_ctrlr, new_ctrlr, trid);
6416 9 : if (rc != 0) {
6417 0 : goto exit;
6418 : }
6419 :
6420 9 : rc = bdev_nvme_check_secondary_namespace(nvme_ctrlr, new_ctrlr);
6421 9 : if (rc != 0) {
6422 0 : goto exit;
6423 : }
6424 :
6425 9 : rc = _bdev_nvme_add_secondary_trid(nvme_ctrlr, trid);
6426 :
6427 : exit:
6428 9 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6429 :
6430 9 : spdk_nvme_detach(new_ctrlr);
6431 :
6432 9 : return rc;
6433 : }
6434 :
6435 : static void
6436 53 : connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6437 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
6438 : {
6439 53 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
6440 : struct nvme_async_probe_ctx *ctx;
6441 : int rc;
6442 :
6443 53 : ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
6444 53 : ctx->ctrlr_attached = true;
6445 :
6446 53 : rc = nvme_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx);
6447 53 : if (rc != 0) {
6448 1 : ctx->reported_bdevs = 0;
6449 1 : populate_namespaces_cb(ctx, rc);
6450 1 : }
6451 53 : }
6452 :
6453 :
6454 : static void
6455 4 : connect_set_failover_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6456 : struct spdk_nvme_ctrlr *ctrlr,
6457 : const struct spdk_nvme_ctrlr_opts *opts)
6458 : {
6459 4 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
6460 : struct nvme_ctrlr *nvme_ctrlr;
6461 : struct nvme_async_probe_ctx *ctx;
6462 : int rc;
6463 :
6464 4 : ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
6465 4 : ctx->ctrlr_attached = true;
6466 :
6467 4 : nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
6468 4 : if (nvme_ctrlr) {
6469 4 : rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, ctrlr, &ctx->trid);
6470 4 : } else {
6471 0 : rc = -ENODEV;
6472 : }
6473 :
6474 4 : ctx->reported_bdevs = 0;
6475 4 : populate_namespaces_cb(ctx, rc);
6476 4 : }
6477 :
6478 : static int
6479 58 : bdev_nvme_async_poll(void *arg)
6480 : {
6481 58 : struct nvme_async_probe_ctx *ctx = arg;
6482 : int rc;
6483 :
6484 58 : rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
6485 58 : if (spdk_unlikely(rc != -EAGAIN)) {
6486 58 : ctx->probe_done = true;
6487 58 : spdk_poller_unregister(&ctx->poller);
6488 58 : if (!ctx->ctrlr_attached) {
6489 : /* The probe is done, but no controller was attached.
6490 : * That means we had a failure, so report -EIO back to
6491 : * the caller (usually the RPC). populate_namespaces_cb()
6492 : * will take care of freeing the nvme_async_probe_ctx.
6493 : */
6494 1 : ctx->reported_bdevs = 0;
6495 1 : populate_namespaces_cb(ctx, -EIO);
6496 58 : } else if (ctx->namespaces_populated) {
6497 : /* The namespaces for the attached controller were all
6498 : * populated and the response was already sent to the
6499 : * caller (usually the RPC). So free the context here.
6500 : */
6501 21 : free_nvme_async_probe_ctx(ctx);
6502 21 : }
6503 58 : }
6504 :
6505 58 : return SPDK_POLLER_BUSY;
6506 : }
6507 :
6508 : static bool
6509 77 : bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
6510 : uint32_t reconnect_delay_sec,
6511 : uint32_t fast_io_fail_timeout_sec)
6512 : {
6513 77 : if (ctrlr_loss_timeout_sec < -1) {
6514 1 : SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n");
6515 1 : return false;
6516 76 : } else if (ctrlr_loss_timeout_sec == -1) {
6517 14 : if (reconnect_delay_sec == 0) {
6518 1 : SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
6519 1 : return false;
6520 13 : } else if (fast_io_fail_timeout_sec != 0 &&
6521 3 : fast_io_fail_timeout_sec < reconnect_delay_sec) {
6522 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io-fail_timeout_sec.\n");
6523 1 : return false;
6524 : }
6525 74 : } else if (ctrlr_loss_timeout_sec != 0) {
6526 11 : if (reconnect_delay_sec == 0) {
6527 1 : SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
6528 1 : return false;
6529 10 : } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) {
6530 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n");
6531 1 : return false;
6532 9 : } else if (fast_io_fail_timeout_sec != 0) {
6533 6 : if (fast_io_fail_timeout_sec < reconnect_delay_sec) {
6534 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io_fail_timeout_sec.\n");
6535 1 : return false;
6536 5 : } else if (fast_io_fail_timeout_sec > (uint32_t)ctrlr_loss_timeout_sec) {
6537 1 : SPDK_ERRLOG("fast_io_fail_timeout_sec can't be more than ctrlr_loss_timeout_sec.\n");
6538 1 : return false;
6539 : }
6540 4 : }
6541 58 : } else if (reconnect_delay_sec != 0 || fast_io_fail_timeout_sec != 0) {
6542 2 : SPDK_ERRLOG("Both reconnect_delay_sec and fast_io_fail_timeout_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n");
6543 2 : return false;
6544 : }
6545 :
6546 68 : return true;
6547 77 : }
6548 :
6549 : int
6550 58 : spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid,
6551 : const char *base_name,
6552 : const char **names,
6553 : uint32_t count,
6554 : spdk_bdev_nvme_create_cb cb_fn,
6555 : void *cb_ctx,
6556 : struct spdk_nvme_ctrlr_opts *drv_opts,
6557 : struct spdk_bdev_nvme_ctrlr_opts *bdev_opts)
6558 : {
6559 : struct nvme_probe_skip_entry *entry, *tmp;
6560 : struct nvme_async_probe_ctx *ctx;
6561 : spdk_nvme_attach_cb attach_cb;
6562 : struct nvme_ctrlr *nvme_ctrlr;
6563 : int len;
6564 :
6565 : /* TODO expand this check to include both the host and target TRIDs.
6566 : * Only if both are the same should we fail.
6567 : */
6568 58 : if (nvme_ctrlr_get(trid, drv_opts->hostnqn) != NULL) {
6569 0 : SPDK_ERRLOG("A controller with the provided trid (traddr: %s, hostnqn: %s) "
6570 : "already exists.\n", trid->traddr, drv_opts->hostnqn);
6571 0 : return -EEXIST;
6572 : }
6573 :
6574 58 : len = strnlen(base_name, SPDK_CONTROLLER_NAME_MAX);
6575 :
6576 58 : if (len == 0 || len == SPDK_CONTROLLER_NAME_MAX) {
6577 0 : SPDK_ERRLOG("controller name must be between 1 and %d characters\n", SPDK_CONTROLLER_NAME_MAX - 1);
6578 0 : return -EINVAL;
6579 : }
6580 :
6581 58 : if (bdev_opts != NULL &&
6582 116 : !bdev_nvme_check_io_error_resiliency_params(bdev_opts->ctrlr_loss_timeout_sec,
6583 58 : bdev_opts->reconnect_delay_sec,
6584 58 : bdev_opts->fast_io_fail_timeout_sec)) {
6585 0 : return -EINVAL;
6586 : }
6587 :
6588 58 : ctx = calloc(1, sizeof(*ctx));
6589 58 : if (!ctx) {
6590 0 : return -ENOMEM;
6591 : }
6592 58 : ctx->base_name = base_name;
6593 58 : ctx->names = names;
6594 58 : ctx->max_bdevs = count;
6595 58 : ctx->cb_fn = cb_fn;
6596 58 : ctx->cb_ctx = cb_ctx;
6597 58 : ctx->trid = *trid;
6598 :
6599 58 : if (bdev_opts) {
6600 58 : memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
6601 58 : } else {
6602 0 : spdk_bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
6603 : }
6604 :
6605 58 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
6606 0 : TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
6607 0 : if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
6608 0 : TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
6609 0 : free(entry);
6610 0 : break;
6611 : }
6612 0 : }
6613 0 : }
6614 :
6615 58 : memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
6616 58 : ctx->drv_opts.transport_retry_count = g_opts.transport_retry_count;
6617 58 : ctx->drv_opts.transport_ack_timeout = g_opts.transport_ack_timeout;
6618 58 : ctx->drv_opts.keep_alive_timeout_ms = g_opts.keep_alive_timeout_ms;
6619 58 : ctx->drv_opts.disable_read_ana_log_page = true;
6620 58 : ctx->drv_opts.transport_tos = g_opts.transport_tos;
6621 :
6622 58 : if (spdk_interrupt_mode_is_enabled()) {
6623 0 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
6624 0 : ctx->drv_opts.enable_interrupts = true;
6625 0 : } else {
6626 0 : SPDK_ERRLOG("Interrupt mode is only supported with PCIe transport\n");
6627 0 : free_nvme_async_probe_ctx(ctx);
6628 0 : return -ENOTSUP;
6629 : }
6630 0 : }
6631 :
6632 58 : if (ctx->bdev_opts.psk != NULL) {
6633 0 : ctx->drv_opts.tls_psk = spdk_keyring_get_key(ctx->bdev_opts.psk);
6634 0 : if (ctx->drv_opts.tls_psk == NULL) {
6635 0 : SPDK_ERRLOG("Could not load PSK: %s\n", ctx->bdev_opts.psk);
6636 0 : free_nvme_async_probe_ctx(ctx);
6637 0 : return -ENOKEY;
6638 : }
6639 0 : }
6640 :
6641 58 : if (ctx->bdev_opts.dhchap_key != NULL) {
6642 0 : ctx->drv_opts.dhchap_key = spdk_keyring_get_key(ctx->bdev_opts.dhchap_key);
6643 0 : if (ctx->drv_opts.dhchap_key == NULL) {
6644 0 : SPDK_ERRLOG("Could not load DH-HMAC-CHAP key: %s\n",
6645 : ctx->bdev_opts.dhchap_key);
6646 0 : free_nvme_async_probe_ctx(ctx);
6647 0 : return -ENOKEY;
6648 : }
6649 :
6650 0 : ctx->drv_opts.dhchap_digests = g_opts.dhchap_digests;
6651 0 : ctx->drv_opts.dhchap_dhgroups = g_opts.dhchap_dhgroups;
6652 0 : }
6653 58 : if (ctx->bdev_opts.dhchap_ctrlr_key != NULL) {
6654 0 : ctx->drv_opts.dhchap_ctrlr_key =
6655 0 : spdk_keyring_get_key(ctx->bdev_opts.dhchap_ctrlr_key);
6656 0 : if (ctx->drv_opts.dhchap_ctrlr_key == NULL) {
6657 0 : SPDK_ERRLOG("Could not load DH-HMAC-CHAP controller key: %s\n",
6658 : ctx->bdev_opts.dhchap_ctrlr_key);
6659 0 : free_nvme_async_probe_ctx(ctx);
6660 0 : return -ENOKEY;
6661 : }
6662 0 : }
6663 :
6664 58 : if (nvme_bdev_ctrlr_get_by_name(base_name) == NULL || ctx->bdev_opts.multipath) {
6665 54 : attach_cb = connect_attach_cb;
6666 54 : } else {
6667 4 : attach_cb = connect_set_failover_cb;
6668 : }
6669 :
6670 58 : nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
6671 58 : if (nvme_ctrlr && nvme_ctrlr->opts.multipath != ctx->bdev_opts.multipath) {
6672 : /* All controllers with the same name must be configured the same
6673 : * way, either for multipath or failover. If the configuration doesn't
6674 : * match - report error.
6675 : */
6676 0 : free_nvme_async_probe_ctx(ctx);
6677 0 : return -EINVAL;
6678 : }
6679 :
6680 58 : ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, attach_cb);
6681 58 : if (ctx->probe_ctx == NULL) {
6682 0 : SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
6683 0 : free_nvme_async_probe_ctx(ctx);
6684 0 : return -ENODEV;
6685 : }
6686 58 : ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000);
6687 :
6688 58 : return 0;
6689 58 : }
6690 :
6691 : struct bdev_nvme_delete_ctx {
6692 : char *name;
6693 : struct nvme_path_id path_id;
6694 : bdev_nvme_delete_done_fn delete_done;
6695 : void *delete_done_ctx;
6696 : uint64_t timeout_ticks;
6697 : struct spdk_poller *poller;
6698 : };
6699 :
6700 : static void
6701 2 : free_bdev_nvme_delete_ctx(struct bdev_nvme_delete_ctx *ctx)
6702 : {
6703 2 : if (ctx != NULL) {
6704 1 : free(ctx->name);
6705 1 : free(ctx);
6706 1 : }
6707 2 : }
6708 :
6709 : static bool
6710 81 : nvme_path_id_compare(struct nvme_path_id *p, const struct nvme_path_id *path_id)
6711 : {
6712 81 : if (path_id->trid.trtype != 0) {
6713 21 : if (path_id->trid.trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
6714 0 : if (strcasecmp(path_id->trid.trstring, p->trid.trstring) != 0) {
6715 0 : return false;
6716 : }
6717 0 : } else {
6718 21 : if (path_id->trid.trtype != p->trid.trtype) {
6719 0 : return false;
6720 : }
6721 : }
6722 21 : }
6723 :
6724 81 : if (!spdk_mem_all_zero(path_id->trid.traddr, sizeof(path_id->trid.traddr))) {
6725 21 : if (strcasecmp(path_id->trid.traddr, p->trid.traddr) != 0) {
6726 11 : return false;
6727 : }
6728 10 : }
6729 :
6730 70 : if (path_id->trid.adrfam != 0) {
6731 0 : if (path_id->trid.adrfam != p->trid.adrfam) {
6732 0 : return false;
6733 : }
6734 0 : }
6735 :
6736 70 : if (!spdk_mem_all_zero(path_id->trid.trsvcid, sizeof(path_id->trid.trsvcid))) {
6737 10 : if (strcasecmp(path_id->trid.trsvcid, p->trid.trsvcid) != 0) {
6738 0 : return false;
6739 : }
6740 10 : }
6741 :
6742 70 : if (!spdk_mem_all_zero(path_id->trid.subnqn, sizeof(path_id->trid.subnqn))) {
6743 10 : if (strcmp(path_id->trid.subnqn, p->trid.subnqn) != 0) {
6744 0 : return false;
6745 : }
6746 10 : }
6747 :
6748 70 : if (!spdk_mem_all_zero(path_id->hostid.hostaddr, sizeof(path_id->hostid.hostaddr))) {
6749 0 : if (strcmp(path_id->hostid.hostaddr, p->hostid.hostaddr) != 0) {
6750 0 : return false;
6751 : }
6752 0 : }
6753 :
6754 70 : if (!spdk_mem_all_zero(path_id->hostid.hostsvcid, sizeof(path_id->hostid.hostsvcid))) {
6755 0 : if (strcmp(path_id->hostid.hostsvcid, p->hostid.hostsvcid) != 0) {
6756 0 : return false;
6757 : }
6758 0 : }
6759 :
6760 70 : return true;
6761 81 : }
6762 :
6763 : static bool
6764 2 : nvme_path_id_exists(const char *name, const struct nvme_path_id *path_id)
6765 : {
6766 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
6767 : struct nvme_ctrlr *ctrlr;
6768 : struct nvme_path_id *p;
6769 :
6770 2 : pthread_mutex_lock(&g_bdev_nvme_mutex);
6771 2 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
6772 2 : if (!nbdev_ctrlr) {
6773 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6774 1 : return false;
6775 : }
6776 :
6777 1 : TAILQ_FOREACH(ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
6778 1 : pthread_mutex_lock(&ctrlr->mutex);
6779 1 : TAILQ_FOREACH(p, &ctrlr->trids, link) {
6780 1 : if (nvme_path_id_compare(p, path_id)) {
6781 1 : pthread_mutex_unlock(&ctrlr->mutex);
6782 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6783 1 : return true;
6784 : }
6785 0 : }
6786 0 : pthread_mutex_unlock(&ctrlr->mutex);
6787 0 : }
6788 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6789 :
6790 0 : return false;
6791 2 : }
6792 :
6793 : static int
6794 2 : bdev_nvme_delete_complete_poll(void *arg)
6795 : {
6796 2 : struct bdev_nvme_delete_ctx *ctx = arg;
6797 2 : int rc = 0;
6798 :
6799 2 : if (nvme_path_id_exists(ctx->name, &ctx->path_id)) {
6800 1 : if (ctx->timeout_ticks > spdk_get_ticks()) {
6801 1 : return SPDK_POLLER_BUSY;
6802 : }
6803 :
6804 0 : SPDK_ERRLOG("NVMe path '%s' still exists after delete\n", ctx->name);
6805 0 : rc = -ETIMEDOUT;
6806 0 : }
6807 :
6808 1 : spdk_poller_unregister(&ctx->poller);
6809 :
6810 1 : ctx->delete_done(ctx->delete_done_ctx, rc);
6811 1 : free_bdev_nvme_delete_ctx(ctx);
6812 :
6813 1 : return SPDK_POLLER_BUSY;
6814 2 : }
6815 :
6816 : static int
6817 70 : _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, const struct nvme_path_id *path_id)
6818 : {
6819 : struct nvme_path_id *p, *t;
6820 : spdk_msg_fn msg_fn;
6821 70 : int rc = -ENXIO;
6822 :
6823 70 : pthread_mutex_lock(&nvme_ctrlr->mutex);
6824 :
6825 80 : TAILQ_FOREACH_REVERSE_SAFE(p, &nvme_ctrlr->trids, nvme_paths, link, t) {
6826 80 : if (p == TAILQ_FIRST(&nvme_ctrlr->trids)) {
6827 70 : break;
6828 : }
6829 :
6830 10 : if (!nvme_path_id_compare(p, path_id)) {
6831 3 : continue;
6832 : }
6833 :
6834 : /* We are not using the specified path. */
6835 7 : TAILQ_REMOVE(&nvme_ctrlr->trids, p, link);
6836 7 : free(p);
6837 7 : rc = 0;
6838 7 : }
6839 :
6840 70 : if (p == NULL || !nvme_path_id_compare(p, path_id)) {
6841 8 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6842 8 : return rc;
6843 : }
6844 :
6845 : /* If we made it here, then this path is a match! Now we need to remove it. */
6846 :
6847 : /* This is the active path in use right now. The active path is always the first in the list. */
6848 62 : assert(p == nvme_ctrlr->active_path_id);
6849 :
6850 62 : if (!TAILQ_NEXT(p, link)) {
6851 : /* The current path is the only path. */
6852 61 : msg_fn = _nvme_ctrlr_destruct;
6853 61 : rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, false);
6854 61 : } else {
6855 : /* There is an alternative path. */
6856 1 : msg_fn = _bdev_nvme_reset_ctrlr;
6857 1 : rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, true);
6858 : }
6859 :
6860 62 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6861 :
6862 62 : if (rc == 0) {
6863 62 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
6864 62 : } else if (rc == -EALREADY) {
6865 0 : rc = 0;
6866 0 : }
6867 :
6868 62 : return rc;
6869 70 : }
6870 :
6871 : int
6872 52 : bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id,
6873 : bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx)
6874 : {
6875 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
6876 : struct nvme_ctrlr *nvme_ctrlr, *tmp_nvme_ctrlr;
6877 52 : struct bdev_nvme_delete_ctx *ctx = NULL;
6878 52 : int rc = -ENXIO, _rc;
6879 :
6880 52 : if (name == NULL || path_id == NULL) {
6881 0 : rc = -EINVAL;
6882 0 : goto exit;
6883 : }
6884 :
6885 52 : pthread_mutex_lock(&g_bdev_nvme_mutex);
6886 :
6887 52 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
6888 52 : if (nbdev_ctrlr == NULL) {
6889 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6890 :
6891 0 : SPDK_ERRLOG("Failed to find NVMe bdev controller\n");
6892 0 : rc = -ENODEV;
6893 0 : goto exit;
6894 : }
6895 :
6896 122 : TAILQ_FOREACH_SAFE(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq, tmp_nvme_ctrlr) {
6897 70 : _rc = _bdev_nvme_delete(nvme_ctrlr, path_id);
6898 70 : if (_rc < 0 && _rc != -ENXIO) {
6899 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6900 0 : rc = _rc;
6901 0 : goto exit;
6902 70 : } else if (_rc == 0) {
6903 : /* We traverse all remaining nvme_ctrlrs even if one nvme_ctrlr
6904 : * was deleted successfully. To remember the successful deletion,
6905 : * overwrite rc only if _rc is zero.
6906 : */
6907 64 : rc = 0;
6908 64 : }
6909 70 : }
6910 :
6911 52 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6912 :
6913 52 : if (rc != 0 || delete_done == NULL) {
6914 51 : goto exit;
6915 : }
6916 :
6917 1 : ctx = calloc(1, sizeof(*ctx));
6918 1 : if (ctx == NULL) {
6919 0 : SPDK_ERRLOG("Failed to allocate context for bdev_nvme_delete\n");
6920 0 : rc = -ENOMEM;
6921 0 : goto exit;
6922 : }
6923 :
6924 1 : ctx->name = strdup(name);
6925 1 : if (ctx->name == NULL) {
6926 0 : SPDK_ERRLOG("Failed to copy controller name for deletion\n");
6927 0 : rc = -ENOMEM;
6928 0 : goto exit;
6929 : }
6930 :
6931 1 : ctx->delete_done = delete_done;
6932 1 : ctx->delete_done_ctx = delete_done_ctx;
6933 1 : ctx->path_id = *path_id;
6934 1 : ctx->timeout_ticks = spdk_get_ticks() + 10 * spdk_get_ticks_hz();
6935 1 : ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_delete_complete_poll, ctx, 1000);
6936 1 : if (ctx->poller == NULL) {
6937 0 : SPDK_ERRLOG("Failed to register bdev_nvme_delete poller\n");
6938 0 : rc = -ENOMEM;
6939 0 : goto exit;
6940 : }
6941 :
6942 : exit:
6943 52 : if (rc != 0) {
6944 1 : free_bdev_nvme_delete_ctx(ctx);
6945 1 : }
6946 :
6947 52 : return rc;
6948 : }
6949 :
6950 : #define DISCOVERY_INFOLOG(ctx, format, ...) \
6951 : SPDK_INFOLOG(bdev_nvme, "Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
6952 :
6953 : #define DISCOVERY_ERRLOG(ctx, format, ...) \
6954 : SPDK_ERRLOG("Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
6955 :
6956 : struct discovery_entry_ctx {
6957 : char name[128];
6958 : struct spdk_nvme_transport_id trid;
6959 : struct spdk_nvme_ctrlr_opts drv_opts;
6960 : struct spdk_nvmf_discovery_log_page_entry entry;
6961 : TAILQ_ENTRY(discovery_entry_ctx) tailq;
6962 : struct discovery_ctx *ctx;
6963 : };
6964 :
6965 : struct discovery_ctx {
6966 : char *name;
6967 : spdk_bdev_nvme_start_discovery_fn start_cb_fn;
6968 : spdk_bdev_nvme_stop_discovery_fn stop_cb_fn;
6969 : void *cb_ctx;
6970 : struct spdk_nvme_probe_ctx *probe_ctx;
6971 : struct spdk_nvme_detach_ctx *detach_ctx;
6972 : struct spdk_nvme_ctrlr *ctrlr;
6973 : struct spdk_nvme_transport_id trid;
6974 : struct discovery_entry_ctx *entry_ctx_in_use;
6975 : struct spdk_poller *poller;
6976 : struct spdk_nvme_ctrlr_opts drv_opts;
6977 : struct spdk_bdev_nvme_ctrlr_opts bdev_opts;
6978 : struct spdk_nvmf_discovery_log_page *log_page;
6979 : TAILQ_ENTRY(discovery_ctx) tailq;
6980 : TAILQ_HEAD(, discovery_entry_ctx) nvm_entry_ctxs;
6981 : TAILQ_HEAD(, discovery_entry_ctx) discovery_entry_ctxs;
6982 : int rc;
6983 : bool wait_for_attach;
6984 : uint64_t timeout_ticks;
6985 : /* Denotes that the discovery service is being started. We're waiting
6986 : * for the initial connection to the discovery controller to be
6987 : * established and attach discovered NVM ctrlrs.
6988 : */
6989 : bool initializing;
6990 : /* Denotes if a discovery is currently in progress for this context.
6991 : * That includes connecting to newly discovered subsystems. Used to
6992 : * ensure we do not start a new discovery until an existing one is
6993 : * complete.
6994 : */
6995 : bool in_progress;
6996 :
6997 : /* Denotes if another discovery is needed after the one in progress
6998 : * completes. Set when we receive an AER completion while a discovery
6999 : * is already in progress.
7000 : */
7001 : bool pending;
7002 :
7003 : /* Signal to the discovery context poller that it should stop the
7004 : * discovery service, including detaching from the current discovery
7005 : * controller.
7006 : */
7007 : bool stop;
7008 :
7009 : struct spdk_thread *calling_thread;
7010 : uint32_t index;
7011 : uint32_t attach_in_progress;
7012 : char *hostnqn;
7013 :
7014 : /* Denotes if the discovery service was started by the mdns discovery.
7015 : */
7016 : bool from_mdns_discovery_service;
7017 : };
7018 :
7019 : TAILQ_HEAD(discovery_ctxs, discovery_ctx);
7020 : static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs);
7021 :
7022 : static void get_discovery_log_page(struct discovery_ctx *ctx);
7023 :
7024 : static void
7025 0 : free_discovery_ctx(struct discovery_ctx *ctx)
7026 : {
7027 0 : free(ctx->log_page);
7028 0 : free(ctx->hostnqn);
7029 0 : free(ctx->name);
7030 0 : free(ctx);
7031 0 : }
7032 :
7033 : static void
7034 0 : discovery_complete(struct discovery_ctx *ctx)
7035 : {
7036 0 : ctx->initializing = false;
7037 0 : ctx->in_progress = false;
7038 0 : if (ctx->pending) {
7039 0 : ctx->pending = false;
7040 0 : get_discovery_log_page(ctx);
7041 0 : }
7042 0 : }
7043 :
7044 : static void
7045 0 : build_trid_from_log_page_entry(struct spdk_nvme_transport_id *trid,
7046 : struct spdk_nvmf_discovery_log_page_entry *entry)
7047 : {
7048 : char *space;
7049 :
7050 0 : trid->trtype = entry->trtype;
7051 0 : trid->adrfam = entry->adrfam;
7052 0 : memcpy(trid->traddr, entry->traddr, sizeof(entry->traddr));
7053 0 : memcpy(trid->trsvcid, entry->trsvcid, sizeof(entry->trsvcid));
7054 : /* Because the source buffer (entry->subnqn) is longer than trid->subnqn, and
7055 : * before call to this function trid->subnqn is zeroed out, we need
7056 : * to copy sizeof(trid->subnqn) minus one byte to make sure the last character
7057 : * remains 0. Then we can shorten the string (replace ' ' with 0) if required
7058 : */
7059 0 : memcpy(trid->subnqn, entry->subnqn, sizeof(trid->subnqn) - 1);
7060 :
7061 : /* We want the traddr, trsvcid and subnqn fields to be NULL-terminated.
7062 : * But the log page entries typically pad them with spaces, not zeroes.
7063 : * So add a NULL terminator to each of these fields at the appropriate
7064 : * location.
7065 : */
7066 0 : space = strchr(trid->traddr, ' ');
7067 0 : if (space) {
7068 0 : *space = 0;
7069 0 : }
7070 0 : space = strchr(trid->trsvcid, ' ');
7071 0 : if (space) {
7072 0 : *space = 0;
7073 0 : }
7074 0 : space = strchr(trid->subnqn, ' ');
7075 0 : if (space) {
7076 0 : *space = 0;
7077 0 : }
7078 0 : }
7079 :
7080 : static void
7081 0 : _stop_discovery(void *_ctx)
7082 : {
7083 0 : struct discovery_ctx *ctx = _ctx;
7084 :
7085 0 : if (ctx->attach_in_progress > 0) {
7086 0 : spdk_thread_send_msg(spdk_get_thread(), _stop_discovery, ctx);
7087 0 : return;
7088 : }
7089 :
7090 0 : ctx->stop = true;
7091 :
7092 0 : while (!TAILQ_EMPTY(&ctx->nvm_entry_ctxs)) {
7093 : struct discovery_entry_ctx *entry_ctx;
7094 0 : struct nvme_path_id path = {};
7095 :
7096 0 : entry_ctx = TAILQ_FIRST(&ctx->nvm_entry_ctxs);
7097 0 : path.trid = entry_ctx->trid;
7098 0 : bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
7099 0 : TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
7100 0 : free(entry_ctx);
7101 : }
7102 :
7103 0 : while (!TAILQ_EMPTY(&ctx->discovery_entry_ctxs)) {
7104 : struct discovery_entry_ctx *entry_ctx;
7105 :
7106 0 : entry_ctx = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
7107 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
7108 0 : free(entry_ctx);
7109 : }
7110 :
7111 0 : free(ctx->entry_ctx_in_use);
7112 0 : ctx->entry_ctx_in_use = NULL;
7113 0 : }
7114 :
7115 : static void
7116 0 : stop_discovery(struct discovery_ctx *ctx, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
7117 : {
7118 0 : ctx->stop_cb_fn = cb_fn;
7119 0 : ctx->cb_ctx = cb_ctx;
7120 :
7121 0 : if (ctx->attach_in_progress > 0) {
7122 0 : DISCOVERY_INFOLOG(ctx, "stopping discovery with attach_in_progress: %"PRIu32"\n",
7123 : ctx->attach_in_progress);
7124 0 : }
7125 :
7126 0 : _stop_discovery(ctx);
7127 0 : }
7128 :
7129 : static void
7130 2 : remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr)
7131 : {
7132 : struct discovery_ctx *d_ctx;
7133 : struct nvme_path_id *path_id;
7134 2 : struct spdk_nvme_transport_id trid = {};
7135 : struct discovery_entry_ctx *entry_ctx, *tmp;
7136 :
7137 2 : path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
7138 :
7139 2 : TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
7140 0 : TAILQ_FOREACH_SAFE(entry_ctx, &d_ctx->nvm_entry_ctxs, tailq, tmp) {
7141 0 : build_trid_from_log_page_entry(&trid, &entry_ctx->entry);
7142 0 : if (spdk_nvme_transport_id_compare(&trid, &path_id->trid) != 0) {
7143 0 : continue;
7144 : }
7145 :
7146 0 : TAILQ_REMOVE(&d_ctx->nvm_entry_ctxs, entry_ctx, tailq);
7147 0 : free(entry_ctx);
7148 0 : DISCOVERY_INFOLOG(d_ctx, "Remove discovery entry: %s:%s:%s\n",
7149 : trid.subnqn, trid.traddr, trid.trsvcid);
7150 :
7151 : /* Fail discovery ctrlr to force reattach attempt */
7152 0 : spdk_nvme_ctrlr_fail(d_ctx->ctrlr);
7153 0 : }
7154 0 : }
7155 2 : }
7156 :
7157 : static void
7158 0 : discovery_remove_controllers(struct discovery_ctx *ctx)
7159 : {
7160 0 : struct spdk_nvmf_discovery_log_page *log_page = ctx->log_page;
7161 : struct discovery_entry_ctx *entry_ctx, *tmp;
7162 : struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
7163 0 : struct spdk_nvme_transport_id old_trid = {};
7164 : uint64_t numrec, i;
7165 : bool found;
7166 :
7167 0 : numrec = from_le64(&log_page->numrec);
7168 0 : TAILQ_FOREACH_SAFE(entry_ctx, &ctx->nvm_entry_ctxs, tailq, tmp) {
7169 0 : found = false;
7170 0 : old_entry = &entry_ctx->entry;
7171 0 : build_trid_from_log_page_entry(&old_trid, old_entry);
7172 0 : for (i = 0; i < numrec; i++) {
7173 0 : new_entry = &log_page->entries[i];
7174 0 : if (!memcmp(old_entry, new_entry, sizeof(*old_entry))) {
7175 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s found again\n",
7176 : old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
7177 0 : found = true;
7178 0 : break;
7179 : }
7180 0 : }
7181 0 : if (!found) {
7182 0 : struct nvme_path_id path = {};
7183 :
7184 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s not found\n",
7185 : old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
7186 :
7187 0 : path.trid = entry_ctx->trid;
7188 0 : bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
7189 0 : TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
7190 0 : free(entry_ctx);
7191 0 : }
7192 0 : }
7193 0 : free(log_page);
7194 0 : ctx->log_page = NULL;
7195 0 : discovery_complete(ctx);
7196 0 : }
7197 :
7198 : static void
7199 0 : complete_discovery_start(struct discovery_ctx *ctx, int status)
7200 : {
7201 0 : ctx->timeout_ticks = 0;
7202 0 : ctx->rc = status;
7203 0 : if (ctx->start_cb_fn) {
7204 0 : ctx->start_cb_fn(ctx->cb_ctx, status);
7205 0 : ctx->start_cb_fn = NULL;
7206 0 : ctx->cb_ctx = NULL;
7207 0 : }
7208 0 : }
7209 :
7210 : static void
7211 0 : discovery_attach_controller_done(void *cb_ctx, size_t bdev_count, int rc)
7212 : {
7213 0 : struct discovery_entry_ctx *entry_ctx = cb_ctx;
7214 0 : struct discovery_ctx *ctx = entry_ctx->ctx;
7215 :
7216 0 : DISCOVERY_INFOLOG(ctx, "attach %s done\n", entry_ctx->name);
7217 0 : ctx->attach_in_progress--;
7218 0 : if (ctx->attach_in_progress == 0) {
7219 0 : complete_discovery_start(ctx, ctx->rc);
7220 0 : if (ctx->initializing && ctx->rc != 0) {
7221 0 : DISCOVERY_ERRLOG(ctx, "stopping discovery due to errors: %d\n", ctx->rc);
7222 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
7223 0 : } else {
7224 0 : discovery_remove_controllers(ctx);
7225 : }
7226 0 : }
7227 0 : }
7228 :
7229 : static struct discovery_entry_ctx *
7230 0 : create_discovery_entry_ctx(struct discovery_ctx *ctx, struct spdk_nvme_transport_id *trid)
7231 : {
7232 : struct discovery_entry_ctx *new_ctx;
7233 :
7234 0 : new_ctx = calloc(1, sizeof(*new_ctx));
7235 0 : if (new_ctx == NULL) {
7236 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
7237 0 : return NULL;
7238 : }
7239 :
7240 0 : new_ctx->ctx = ctx;
7241 0 : memcpy(&new_ctx->trid, trid, sizeof(*trid));
7242 0 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
7243 0 : snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
7244 0 : return new_ctx;
7245 0 : }
7246 :
7247 : static void
7248 0 : discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
7249 : struct spdk_nvmf_discovery_log_page *log_page)
7250 : {
7251 0 : struct discovery_ctx *ctx = cb_arg;
7252 : struct discovery_entry_ctx *entry_ctx, *tmp;
7253 : struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
7254 : uint64_t numrec, i;
7255 : bool found;
7256 :
7257 0 : if (rc || spdk_nvme_cpl_is_error(cpl)) {
7258 0 : DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
7259 0 : return;
7260 : }
7261 :
7262 0 : ctx->log_page = log_page;
7263 0 : assert(ctx->attach_in_progress == 0);
7264 0 : numrec = from_le64(&log_page->numrec);
7265 0 : TAILQ_FOREACH_SAFE(entry_ctx, &ctx->discovery_entry_ctxs, tailq, tmp) {
7266 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
7267 0 : free(entry_ctx);
7268 0 : }
7269 0 : for (i = 0; i < numrec; i++) {
7270 0 : found = false;
7271 0 : new_entry = &log_page->entries[i];
7272 0 : if (new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT ||
7273 0 : new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
7274 : struct discovery_entry_ctx *new_ctx;
7275 0 : struct spdk_nvme_transport_id trid = {};
7276 :
7277 0 : build_trid_from_log_page_entry(&trid, new_entry);
7278 0 : new_ctx = create_discovery_entry_ctx(ctx, &trid);
7279 0 : if (new_ctx == NULL) {
7280 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
7281 0 : break;
7282 : }
7283 :
7284 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, new_ctx, tailq);
7285 0 : continue;
7286 : }
7287 0 : TAILQ_FOREACH(entry_ctx, &ctx->nvm_entry_ctxs, tailq) {
7288 0 : old_entry = &entry_ctx->entry;
7289 0 : if (!memcmp(new_entry, old_entry, sizeof(*new_entry))) {
7290 0 : found = true;
7291 0 : break;
7292 : }
7293 0 : }
7294 0 : if (!found) {
7295 0 : struct discovery_entry_ctx *subnqn_ctx = NULL, *new_ctx;
7296 : struct discovery_ctx *d_ctx;
7297 :
7298 0 : TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
7299 0 : TAILQ_FOREACH(subnqn_ctx, &d_ctx->nvm_entry_ctxs, tailq) {
7300 0 : if (!memcmp(subnqn_ctx->entry.subnqn, new_entry->subnqn,
7301 : sizeof(new_entry->subnqn))) {
7302 0 : break;
7303 : }
7304 0 : }
7305 0 : if (subnqn_ctx) {
7306 0 : break;
7307 : }
7308 0 : }
7309 :
7310 0 : new_ctx = calloc(1, sizeof(*new_ctx));
7311 0 : if (new_ctx == NULL) {
7312 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
7313 0 : break;
7314 : }
7315 :
7316 0 : new_ctx->ctx = ctx;
7317 0 : memcpy(&new_ctx->entry, new_entry, sizeof(*new_entry));
7318 0 : build_trid_from_log_page_entry(&new_ctx->trid, new_entry);
7319 0 : if (subnqn_ctx) {
7320 0 : snprintf(new_ctx->name, sizeof(new_ctx->name), "%s", subnqn_ctx->name);
7321 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new path for %s\n",
7322 : new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
7323 : new_ctx->name);
7324 0 : } else {
7325 0 : snprintf(new_ctx->name, sizeof(new_ctx->name), "%s%d", ctx->name, ctx->index++);
7326 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new subsystem %s\n",
7327 : new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
7328 : new_ctx->name);
7329 : }
7330 0 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
7331 0 : snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
7332 0 : rc = spdk_bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0,
7333 0 : discovery_attach_controller_done, new_ctx,
7334 0 : &new_ctx->drv_opts, &ctx->bdev_opts);
7335 0 : if (rc == 0) {
7336 0 : TAILQ_INSERT_TAIL(&ctx->nvm_entry_ctxs, new_ctx, tailq);
7337 0 : ctx->attach_in_progress++;
7338 0 : } else {
7339 0 : DISCOVERY_ERRLOG(ctx, "spdk_bdev_nvme_create failed (%s)\n", spdk_strerror(-rc));
7340 : }
7341 0 : }
7342 0 : }
7343 :
7344 0 : if (ctx->attach_in_progress == 0) {
7345 0 : discovery_remove_controllers(ctx);
7346 0 : }
7347 0 : }
7348 :
7349 : static void
7350 0 : get_discovery_log_page(struct discovery_ctx *ctx)
7351 : {
7352 : int rc;
7353 :
7354 0 : assert(ctx->in_progress == false);
7355 0 : ctx->in_progress = true;
7356 0 : rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx);
7357 0 : if (rc != 0) {
7358 0 : DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
7359 0 : }
7360 0 : DISCOVERY_INFOLOG(ctx, "sent discovery log page command\n");
7361 0 : }
7362 :
7363 : static void
7364 0 : discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
7365 : {
7366 0 : struct discovery_ctx *ctx = arg;
7367 0 : uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
7368 :
7369 0 : if (spdk_nvme_cpl_is_error(cpl)) {
7370 0 : DISCOVERY_ERRLOG(ctx, "aer failed\n");
7371 0 : return;
7372 : }
7373 :
7374 0 : if (log_page_id != SPDK_NVME_LOG_DISCOVERY) {
7375 0 : DISCOVERY_ERRLOG(ctx, "unexpected log page 0x%x\n", log_page_id);
7376 0 : return;
7377 : }
7378 :
7379 0 : DISCOVERY_INFOLOG(ctx, "got aer\n");
7380 0 : if (ctx->in_progress) {
7381 0 : ctx->pending = true;
7382 0 : return;
7383 : }
7384 :
7385 0 : get_discovery_log_page(ctx);
7386 0 : }
7387 :
7388 : static void
7389 0 : discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
7390 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
7391 : {
7392 0 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
7393 : struct discovery_ctx *ctx;
7394 :
7395 0 : ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, drv_opts);
7396 :
7397 0 : DISCOVERY_INFOLOG(ctx, "discovery ctrlr attached\n");
7398 0 : ctx->probe_ctx = NULL;
7399 0 : ctx->ctrlr = ctrlr;
7400 :
7401 0 : if (ctx->rc != 0) {
7402 0 : DISCOVERY_ERRLOG(ctx, "encountered error while attaching discovery ctrlr: %d\n",
7403 : ctx->rc);
7404 0 : return;
7405 : }
7406 :
7407 0 : spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx);
7408 0 : }
7409 :
7410 : static int
7411 0 : discovery_poller(void *arg)
7412 : {
7413 0 : struct discovery_ctx *ctx = arg;
7414 : struct spdk_nvme_transport_id *trid;
7415 : int rc;
7416 :
7417 0 : if (ctx->detach_ctx) {
7418 0 : rc = spdk_nvme_detach_poll_async(ctx->detach_ctx);
7419 0 : if (rc != -EAGAIN) {
7420 0 : ctx->detach_ctx = NULL;
7421 0 : ctx->ctrlr = NULL;
7422 0 : }
7423 0 : } else if (ctx->stop) {
7424 0 : if (ctx->ctrlr != NULL) {
7425 0 : rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
7426 0 : if (rc == 0) {
7427 0 : return SPDK_POLLER_BUSY;
7428 : }
7429 0 : DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
7430 0 : }
7431 0 : spdk_poller_unregister(&ctx->poller);
7432 0 : TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
7433 0 : assert(ctx->start_cb_fn == NULL);
7434 0 : if (ctx->stop_cb_fn != NULL) {
7435 0 : ctx->stop_cb_fn(ctx->cb_ctx);
7436 0 : }
7437 0 : free_discovery_ctx(ctx);
7438 0 : } else if (ctx->probe_ctx == NULL && ctx->ctrlr == NULL) {
7439 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7440 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
7441 0 : assert(ctx->initializing);
7442 0 : spdk_poller_unregister(&ctx->poller);
7443 0 : TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
7444 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7445 0 : stop_discovery(ctx, NULL, NULL);
7446 0 : free_discovery_ctx(ctx);
7447 0 : return SPDK_POLLER_BUSY;
7448 : }
7449 :
7450 0 : assert(ctx->entry_ctx_in_use == NULL);
7451 0 : ctx->entry_ctx_in_use = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
7452 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7453 0 : trid = &ctx->entry_ctx_in_use->trid;
7454 :
7455 : /* All controllers must be configured explicitely either for multipath or failover.
7456 : * While discovery use multipath mode, we need to set this in bdev options as well.
7457 : */
7458 0 : ctx->bdev_opts.multipath = true;
7459 :
7460 0 : ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, discovery_attach_cb);
7461 0 : if (ctx->probe_ctx) {
7462 0 : spdk_poller_unregister(&ctx->poller);
7463 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000);
7464 0 : } else {
7465 0 : DISCOVERY_ERRLOG(ctx, "could not start discovery connect\n");
7466 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7467 0 : ctx->entry_ctx_in_use = NULL;
7468 : }
7469 0 : } else if (ctx->probe_ctx) {
7470 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7471 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
7472 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7473 0 : return SPDK_POLLER_BUSY;
7474 : }
7475 :
7476 0 : rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
7477 0 : if (rc != -EAGAIN) {
7478 0 : if (ctx->rc != 0) {
7479 0 : assert(ctx->initializing);
7480 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
7481 0 : } else {
7482 0 : assert(rc == 0);
7483 0 : DISCOVERY_INFOLOG(ctx, "discovery ctrlr connected\n");
7484 0 : ctx->rc = rc;
7485 0 : get_discovery_log_page(ctx);
7486 : }
7487 0 : }
7488 0 : } else {
7489 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7490 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching NVM ctrlrs\n");
7491 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7492 : /* We need to wait until all NVM ctrlrs are attached before we stop the
7493 : * discovery service to make sure we don't detach a ctrlr that is still
7494 : * being attached.
7495 : */
7496 0 : if (ctx->attach_in_progress == 0) {
7497 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
7498 0 : return SPDK_POLLER_BUSY;
7499 : }
7500 0 : }
7501 :
7502 0 : rc = spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr);
7503 0 : if (rc < 0) {
7504 0 : spdk_poller_unregister(&ctx->poller);
7505 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
7506 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7507 0 : ctx->entry_ctx_in_use = NULL;
7508 :
7509 0 : rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
7510 0 : if (rc != 0) {
7511 0 : DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
7512 0 : ctx->ctrlr = NULL;
7513 0 : }
7514 0 : }
7515 : }
7516 :
7517 0 : return SPDK_POLLER_BUSY;
7518 0 : }
7519 :
7520 : static void
7521 0 : start_discovery_poller(void *arg)
7522 : {
7523 0 : struct discovery_ctx *ctx = arg;
7524 :
7525 0 : TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq);
7526 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
7527 0 : }
7528 :
7529 : int
7530 0 : bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid,
7531 : const char *base_name,
7532 : struct spdk_nvme_ctrlr_opts *drv_opts,
7533 : struct spdk_bdev_nvme_ctrlr_opts *bdev_opts,
7534 : uint64_t attach_timeout,
7535 : bool from_mdns,
7536 : spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx)
7537 : {
7538 : struct discovery_ctx *ctx;
7539 : struct discovery_entry_ctx *discovery_entry_ctx;
7540 :
7541 0 : snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
7542 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7543 0 : if (strcmp(ctx->name, base_name) == 0) {
7544 0 : return -EEXIST;
7545 : }
7546 :
7547 0 : if (ctx->entry_ctx_in_use != NULL) {
7548 0 : if (!spdk_nvme_transport_id_compare(trid, &ctx->entry_ctx_in_use->trid)) {
7549 0 : return -EEXIST;
7550 : }
7551 0 : }
7552 :
7553 0 : TAILQ_FOREACH(discovery_entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
7554 0 : if (!spdk_nvme_transport_id_compare(trid, &discovery_entry_ctx->trid)) {
7555 0 : return -EEXIST;
7556 : }
7557 0 : }
7558 0 : }
7559 :
7560 0 : ctx = calloc(1, sizeof(*ctx));
7561 0 : if (ctx == NULL) {
7562 0 : return -ENOMEM;
7563 : }
7564 :
7565 0 : ctx->name = strdup(base_name);
7566 0 : if (ctx->name == NULL) {
7567 0 : free_discovery_ctx(ctx);
7568 0 : return -ENOMEM;
7569 : }
7570 0 : memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
7571 0 : memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
7572 0 : ctx->from_mdns_discovery_service = from_mdns;
7573 0 : ctx->bdev_opts.from_discovery_service = true;
7574 0 : ctx->calling_thread = spdk_get_thread();
7575 0 : ctx->start_cb_fn = cb_fn;
7576 0 : ctx->cb_ctx = cb_ctx;
7577 0 : ctx->initializing = true;
7578 0 : if (ctx->start_cb_fn) {
7579 : /* We can use this when dumping json to denote if this RPC parameter
7580 : * was specified or not.
7581 : */
7582 0 : ctx->wait_for_attach = true;
7583 0 : }
7584 0 : if (attach_timeout != 0) {
7585 0 : ctx->timeout_ticks = spdk_get_ticks() + attach_timeout *
7586 0 : spdk_get_ticks_hz() / 1000ull;
7587 0 : }
7588 0 : TAILQ_INIT(&ctx->nvm_entry_ctxs);
7589 0 : TAILQ_INIT(&ctx->discovery_entry_ctxs);
7590 0 : memcpy(&ctx->trid, trid, sizeof(*trid));
7591 : /* Even if user did not specify hostnqn, we can still strdup("\0"); */
7592 0 : ctx->hostnqn = strdup(ctx->drv_opts.hostnqn);
7593 0 : if (ctx->hostnqn == NULL) {
7594 0 : free_discovery_ctx(ctx);
7595 0 : return -ENOMEM;
7596 : }
7597 0 : discovery_entry_ctx = create_discovery_entry_ctx(ctx, trid);
7598 0 : if (discovery_entry_ctx == NULL) {
7599 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
7600 0 : free_discovery_ctx(ctx);
7601 0 : return -ENOMEM;
7602 : }
7603 :
7604 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, discovery_entry_ctx, tailq);
7605 0 : spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx);
7606 0 : return 0;
7607 0 : }
7608 :
7609 : int
7610 0 : bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
7611 : {
7612 : struct discovery_ctx *ctx;
7613 :
7614 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7615 0 : if (strcmp(name, ctx->name) == 0) {
7616 0 : if (ctx->stop) {
7617 0 : return -EALREADY;
7618 : }
7619 : /* If we're still starting the discovery service and ->rc is non-zero, we're
7620 : * going to stop it as soon as we can
7621 : */
7622 0 : if (ctx->initializing && ctx->rc != 0) {
7623 0 : return -EALREADY;
7624 : }
7625 0 : stop_discovery(ctx, cb_fn, cb_ctx);
7626 0 : return 0;
7627 : }
7628 0 : }
7629 :
7630 0 : return -ENOENT;
7631 0 : }
7632 :
7633 : static int
7634 1 : bdev_nvme_library_init(void)
7635 : {
7636 1 : g_bdev_nvme_init_thread = spdk_get_thread();
7637 :
7638 1 : spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_create_poll_group_cb,
7639 : bdev_nvme_destroy_poll_group_cb,
7640 : sizeof(struct nvme_poll_group), "nvme_poll_groups");
7641 :
7642 1 : return 0;
7643 : }
7644 :
7645 : static void
7646 1 : bdev_nvme_fini_destruct_ctrlrs(void)
7647 : {
7648 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
7649 : struct nvme_ctrlr *nvme_ctrlr;
7650 :
7651 1 : pthread_mutex_lock(&g_bdev_nvme_mutex);
7652 1 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
7653 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
7654 0 : pthread_mutex_lock(&nvme_ctrlr->mutex);
7655 0 : if (nvme_ctrlr->destruct) {
7656 : /* This controller's destruction was already started
7657 : * before the application started shutting down
7658 : */
7659 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
7660 0 : continue;
7661 : }
7662 0 : nvme_ctrlr->destruct = true;
7663 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
7664 :
7665 0 : spdk_thread_send_msg(nvme_ctrlr->thread, _nvme_ctrlr_destruct,
7666 0 : nvme_ctrlr);
7667 0 : }
7668 0 : }
7669 :
7670 1 : g_bdev_nvme_module_finish = true;
7671 1 : if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
7672 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
7673 1 : spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
7674 1 : spdk_bdev_module_fini_done();
7675 1 : return;
7676 : }
7677 :
7678 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
7679 1 : }
7680 :
7681 : static void
7682 0 : check_discovery_fini(void *arg)
7683 : {
7684 0 : if (TAILQ_EMPTY(&g_discovery_ctxs)) {
7685 0 : bdev_nvme_fini_destruct_ctrlrs();
7686 0 : }
7687 0 : }
7688 :
7689 : static void
7690 1 : bdev_nvme_library_fini(void)
7691 : {
7692 : struct nvme_probe_skip_entry *entry, *entry_tmp;
7693 : struct discovery_ctx *ctx;
7694 :
7695 1 : spdk_poller_unregister(&g_hotplug_poller);
7696 1 : free(g_hotplug_probe_ctx);
7697 1 : g_hotplug_probe_ctx = NULL;
7698 :
7699 1 : TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
7700 0 : TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
7701 0 : free(entry);
7702 0 : }
7703 :
7704 1 : assert(spdk_get_thread() == g_bdev_nvme_init_thread);
7705 1 : if (TAILQ_EMPTY(&g_discovery_ctxs)) {
7706 1 : bdev_nvme_fini_destruct_ctrlrs();
7707 1 : } else {
7708 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7709 0 : stop_discovery(ctx, check_discovery_fini, NULL);
7710 0 : }
7711 : }
7712 1 : }
7713 :
7714 : static void
7715 0 : bdev_nvme_verify_pi_error(struct nvme_bdev_io *bio)
7716 : {
7717 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7718 0 : struct spdk_bdev *bdev = bdev_io->bdev;
7719 : struct spdk_dif_ctx dif_ctx;
7720 0 : struct spdk_dif_error err_blk = {};
7721 : int rc;
7722 : struct spdk_dif_ctx_init_ext_opts dif_opts;
7723 :
7724 0 : dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
7725 0 : dif_opts.dif_pi_format = bdev->dif_pi_format;
7726 0 : rc = spdk_dif_ctx_init(&dif_ctx,
7727 0 : bdev->blocklen, bdev->md_len, bdev->md_interleave,
7728 0 : bdev->dif_is_head_of_md, bdev->dif_type,
7729 0 : bdev_io->u.bdev.dif_check_flags,
7730 0 : bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0, &dif_opts);
7731 0 : if (rc != 0) {
7732 0 : SPDK_ERRLOG("Initialization of DIF context failed\n");
7733 0 : return;
7734 : }
7735 :
7736 0 : if (bdev->md_interleave) {
7737 0 : rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
7738 0 : bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
7739 0 : } else {
7740 0 : struct iovec md_iov = {
7741 0 : .iov_base = bdev_io->u.bdev.md_buf,
7742 0 : .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len,
7743 : };
7744 :
7745 0 : rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
7746 0 : &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
7747 : }
7748 :
7749 0 : if (rc != 0) {
7750 0 : SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
7751 : err_blk.err_type, err_blk.err_offset);
7752 0 : } else {
7753 0 : SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
7754 : }
7755 0 : }
7756 :
7757 : static void
7758 0 : bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7759 : {
7760 0 : struct nvme_bdev_io *bio = ref;
7761 :
7762 0 : if (spdk_nvme_cpl_is_success(cpl)) {
7763 : /* Run PI verification for read data buffer. */
7764 0 : bdev_nvme_verify_pi_error(bio);
7765 0 : }
7766 :
7767 : /* Return original completion status */
7768 0 : bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
7769 0 : }
7770 :
7771 : static void
7772 3 : bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7773 : {
7774 3 : struct nvme_bdev_io *bio = ref;
7775 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7776 : int ret;
7777 :
7778 3 : if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
7779 0 : SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
7780 : cpl->status.sct, cpl->status.sc);
7781 :
7782 : /* Save completion status to use after verifying PI error. */
7783 0 : bio->cpl = *cpl;
7784 :
7785 0 : if (spdk_likely(nvme_io_path_is_available(bio->io_path))) {
7786 : /* Read without PI checking to verify PI error. */
7787 0 : ret = bdev_nvme_no_pi_readv(bio,
7788 0 : bdev_io->u.bdev.iovs,
7789 0 : bdev_io->u.bdev.iovcnt,
7790 0 : bdev_io->u.bdev.md_buf,
7791 0 : bdev_io->u.bdev.num_blocks,
7792 0 : bdev_io->u.bdev.offset_blocks);
7793 0 : if (ret == 0) {
7794 0 : return;
7795 : }
7796 0 : }
7797 0 : }
7798 :
7799 3 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7800 3 : }
7801 :
7802 : static void
7803 25 : bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7804 : {
7805 25 : struct nvme_bdev_io *bio = ref;
7806 :
7807 25 : if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
7808 0 : SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
7809 : cpl->status.sct, cpl->status.sc);
7810 : /* Run PI verification for write data buffer if PI error is detected. */
7811 0 : bdev_nvme_verify_pi_error(bio);
7812 0 : }
7813 :
7814 25 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7815 25 : }
7816 :
7817 : static void
7818 0 : bdev_nvme_zone_appendv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7819 : {
7820 0 : struct nvme_bdev_io *bio = ref;
7821 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7822 :
7823 : /* spdk_bdev_io_get_append_location() requires that the ALBA is stored in offset_blocks.
7824 : * Additionally, offset_blocks has to be set before calling bdev_nvme_verify_pi_error().
7825 : */
7826 0 : bdev_io->u.bdev.offset_blocks = *(uint64_t *)&cpl->cdw0;
7827 :
7828 0 : if (spdk_nvme_cpl_is_pi_error(cpl)) {
7829 0 : SPDK_ERRLOG("zone append completed with PI error (sct=%d, sc=%d)\n",
7830 : cpl->status.sct, cpl->status.sc);
7831 : /* Run PI verification for zone append data buffer if PI error is detected. */
7832 0 : bdev_nvme_verify_pi_error(bio);
7833 0 : }
7834 :
7835 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7836 0 : }
7837 :
7838 : static void
7839 1 : bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7840 : {
7841 1 : struct nvme_bdev_io *bio = ref;
7842 :
7843 1 : if (spdk_nvme_cpl_is_pi_error(cpl)) {
7844 0 : SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n",
7845 : cpl->status.sct, cpl->status.sc);
7846 : /* Run PI verification for compare data buffer if PI error is detected. */
7847 0 : bdev_nvme_verify_pi_error(bio);
7848 0 : }
7849 :
7850 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7851 1 : }
7852 :
7853 : static void
7854 4 : bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7855 : {
7856 4 : struct nvme_bdev_io *bio = ref;
7857 :
7858 : /* Compare operation completion */
7859 4 : if (!bio->first_fused_completed) {
7860 : /* Save compare result for write callback */
7861 2 : bio->cpl = *cpl;
7862 2 : bio->first_fused_completed = true;
7863 2 : return;
7864 : }
7865 :
7866 : /* Write operation completion */
7867 2 : if (spdk_nvme_cpl_is_error(&bio->cpl)) {
7868 : /* If bio->cpl is already an error, it means the compare operation failed. In that case,
7869 : * complete the IO with the compare operation's status.
7870 : */
7871 1 : if (!spdk_nvme_cpl_is_error(cpl)) {
7872 1 : SPDK_ERRLOG("Unexpected write success after compare failure.\n");
7873 1 : }
7874 :
7875 1 : bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
7876 1 : } else {
7877 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7878 : }
7879 4 : }
7880 :
7881 : static void
7882 1 : bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
7883 : {
7884 1 : struct nvme_bdev_io *bio = ref;
7885 :
7886 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7887 1 : }
7888 :
7889 : static int
7890 0 : fill_zone_from_report(struct spdk_bdev_zone_info *info, struct spdk_nvme_zns_zone_desc *desc)
7891 : {
7892 0 : switch (desc->zt) {
7893 : case SPDK_NVME_ZONE_TYPE_SEQWR:
7894 0 : info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
7895 0 : break;
7896 : default:
7897 0 : SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", desc->zt);
7898 0 : return -EIO;
7899 : }
7900 :
7901 0 : switch (desc->zs) {
7902 : case SPDK_NVME_ZONE_STATE_EMPTY:
7903 0 : info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
7904 0 : break;
7905 : case SPDK_NVME_ZONE_STATE_IOPEN:
7906 0 : info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
7907 0 : break;
7908 : case SPDK_NVME_ZONE_STATE_EOPEN:
7909 0 : info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
7910 0 : break;
7911 : case SPDK_NVME_ZONE_STATE_CLOSED:
7912 0 : info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
7913 0 : break;
7914 : case SPDK_NVME_ZONE_STATE_RONLY:
7915 0 : info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
7916 0 : break;
7917 : case SPDK_NVME_ZONE_STATE_FULL:
7918 0 : info->state = SPDK_BDEV_ZONE_STATE_FULL;
7919 0 : break;
7920 : case SPDK_NVME_ZONE_STATE_OFFLINE:
7921 0 : info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
7922 0 : break;
7923 : default:
7924 0 : SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", desc->zs);
7925 0 : return -EIO;
7926 : }
7927 :
7928 0 : info->zone_id = desc->zslba;
7929 0 : info->write_pointer = desc->wp;
7930 0 : info->capacity = desc->zcap;
7931 :
7932 0 : return 0;
7933 0 : }
7934 :
7935 : static void
7936 0 : bdev_nvme_get_zone_info_done(void *ref, const struct spdk_nvme_cpl *cpl)
7937 : {
7938 0 : struct nvme_bdev_io *bio = ref;
7939 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7940 0 : uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
7941 0 : uint32_t zones_to_copy = bdev_io->u.zone_mgmt.num_zones;
7942 0 : struct spdk_bdev_zone_info *info = bdev_io->u.zone_mgmt.buf;
7943 : uint64_t max_zones_per_buf, i;
7944 : uint32_t zone_report_bufsize;
7945 : struct spdk_nvme_ns *ns;
7946 : struct spdk_nvme_qpair *qpair;
7947 : int ret;
7948 :
7949 0 : if (spdk_nvme_cpl_is_error(cpl)) {
7950 0 : goto out_complete_io_nvme_cpl;
7951 : }
7952 :
7953 0 : if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
7954 0 : ret = -ENXIO;
7955 0 : goto out_complete_io_ret;
7956 : }
7957 :
7958 0 : ns = bio->io_path->nvme_ns->ns;
7959 0 : qpair = bio->io_path->qpair->qpair;
7960 :
7961 0 : zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
7962 0 : max_zones_per_buf = (zone_report_bufsize - sizeof(*bio->zone_report_buf)) /
7963 : sizeof(bio->zone_report_buf->descs[0]);
7964 :
7965 0 : if (bio->zone_report_buf->nr_zones > max_zones_per_buf) {
7966 0 : ret = -EINVAL;
7967 0 : goto out_complete_io_ret;
7968 : }
7969 :
7970 0 : if (!bio->zone_report_buf->nr_zones) {
7971 0 : ret = -EINVAL;
7972 0 : goto out_complete_io_ret;
7973 : }
7974 :
7975 0 : for (i = 0; i < bio->zone_report_buf->nr_zones && bio->handled_zones < zones_to_copy; i++) {
7976 0 : ret = fill_zone_from_report(&info[bio->handled_zones],
7977 0 : &bio->zone_report_buf->descs[i]);
7978 0 : if (ret) {
7979 0 : goto out_complete_io_ret;
7980 : }
7981 0 : bio->handled_zones++;
7982 0 : }
7983 :
7984 0 : if (bio->handled_zones < zones_to_copy) {
7985 0 : uint64_t zone_size_lba = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
7986 0 : uint64_t slba = zone_id + (zone_size_lba * bio->handled_zones);
7987 :
7988 0 : memset(bio->zone_report_buf, 0, zone_report_bufsize);
7989 0 : ret = spdk_nvme_zns_report_zones(ns, qpair,
7990 0 : bio->zone_report_buf, zone_report_bufsize,
7991 0 : slba, SPDK_NVME_ZRA_LIST_ALL, true,
7992 0 : bdev_nvme_get_zone_info_done, bio);
7993 0 : if (!ret) {
7994 0 : return;
7995 : } else {
7996 0 : goto out_complete_io_ret;
7997 : }
7998 : }
7999 :
8000 : out_complete_io_nvme_cpl:
8001 0 : free(bio->zone_report_buf);
8002 0 : bio->zone_report_buf = NULL;
8003 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
8004 0 : return;
8005 :
8006 : out_complete_io_ret:
8007 0 : free(bio->zone_report_buf);
8008 0 : bio->zone_report_buf = NULL;
8009 0 : bdev_nvme_io_complete(bio, ret);
8010 0 : }
8011 :
8012 : static void
8013 0 : bdev_nvme_zone_management_done(void *ref, const struct spdk_nvme_cpl *cpl)
8014 : {
8015 0 : struct nvme_bdev_io *bio = ref;
8016 :
8017 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
8018 0 : }
8019 :
8020 : static void
8021 4 : bdev_nvme_admin_passthru_complete_nvme_status(void *ctx)
8022 : {
8023 4 : struct nvme_bdev_io *bio = ctx;
8024 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
8025 4 : const struct spdk_nvme_cpl *cpl = &bio->cpl;
8026 :
8027 4 : assert(bdev_nvme_io_type_is_admin(bdev_io->type));
8028 :
8029 4 : __bdev_nvme_io_complete(bdev_io, 0, cpl);
8030 4 : }
8031 :
8032 : static void
8033 3 : bdev_nvme_abort_complete(void *ctx)
8034 : {
8035 3 : struct nvme_bdev_io *bio = ctx;
8036 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
8037 :
8038 3 : if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) {
8039 3 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS, NULL);
8040 3 : } else {
8041 0 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED, NULL);
8042 : }
8043 3 : }
8044 :
8045 : static void
8046 3 : bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl)
8047 : {
8048 3 : struct nvme_bdev_io *bio = ref;
8049 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
8050 :
8051 3 : bio->cpl = *cpl;
8052 3 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_nvme_abort_complete, bio);
8053 3 : }
8054 :
8055 : static void
8056 4 : bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
8057 : {
8058 4 : struct nvme_bdev_io *bio = ref;
8059 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
8060 :
8061 4 : bio->cpl = *cpl;
8062 8 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
8063 4 : bdev_nvme_admin_passthru_complete_nvme_status, bio);
8064 4 : }
8065 :
8066 : static void
8067 0 : bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
8068 : {
8069 0 : struct nvme_bdev_io *bio = ref;
8070 : struct iovec *iov;
8071 :
8072 0 : bio->iov_offset = sgl_offset;
8073 0 : for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
8074 0 : iov = &bio->iovs[bio->iovpos];
8075 0 : if (bio->iov_offset < iov->iov_len) {
8076 0 : break;
8077 : }
8078 :
8079 0 : bio->iov_offset -= iov->iov_len;
8080 0 : }
8081 0 : }
8082 :
8083 : static int
8084 0 : bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
8085 : {
8086 0 : struct nvme_bdev_io *bio = ref;
8087 : struct iovec *iov;
8088 :
8089 0 : assert(bio->iovpos < bio->iovcnt);
8090 :
8091 0 : iov = &bio->iovs[bio->iovpos];
8092 :
8093 0 : *address = iov->iov_base;
8094 0 : *length = iov->iov_len;
8095 :
8096 0 : if (bio->iov_offset) {
8097 0 : assert(bio->iov_offset <= iov->iov_len);
8098 0 : *address += bio->iov_offset;
8099 0 : *length -= bio->iov_offset;
8100 0 : }
8101 :
8102 0 : bio->iov_offset += *length;
8103 0 : if (bio->iov_offset == iov->iov_len) {
8104 0 : bio->iovpos++;
8105 0 : bio->iov_offset = 0;
8106 0 : }
8107 :
8108 0 : return 0;
8109 : }
8110 :
8111 : static void
8112 0 : bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset)
8113 : {
8114 0 : struct nvme_bdev_io *bio = ref;
8115 : struct iovec *iov;
8116 :
8117 0 : bio->fused_iov_offset = sgl_offset;
8118 0 : for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) {
8119 0 : iov = &bio->fused_iovs[bio->fused_iovpos];
8120 0 : if (bio->fused_iov_offset < iov->iov_len) {
8121 0 : break;
8122 : }
8123 :
8124 0 : bio->fused_iov_offset -= iov->iov_len;
8125 0 : }
8126 0 : }
8127 :
8128 : static int
8129 0 : bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length)
8130 : {
8131 0 : struct nvme_bdev_io *bio = ref;
8132 : struct iovec *iov;
8133 :
8134 0 : assert(bio->fused_iovpos < bio->fused_iovcnt);
8135 :
8136 0 : iov = &bio->fused_iovs[bio->fused_iovpos];
8137 :
8138 0 : *address = iov->iov_base;
8139 0 : *length = iov->iov_len;
8140 :
8141 0 : if (bio->fused_iov_offset) {
8142 0 : assert(bio->fused_iov_offset <= iov->iov_len);
8143 0 : *address += bio->fused_iov_offset;
8144 0 : *length -= bio->fused_iov_offset;
8145 0 : }
8146 :
8147 0 : bio->fused_iov_offset += *length;
8148 0 : if (bio->fused_iov_offset == iov->iov_len) {
8149 0 : bio->fused_iovpos++;
8150 0 : bio->fused_iov_offset = 0;
8151 0 : }
8152 :
8153 0 : return 0;
8154 : }
8155 :
8156 : static int
8157 0 : bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
8158 : void *md, uint64_t lba_count, uint64_t lba)
8159 : {
8160 : int rc;
8161 :
8162 0 : SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 " without PI check\n",
8163 : lba_count, lba);
8164 :
8165 0 : bio->iovs = iov;
8166 0 : bio->iovcnt = iovcnt;
8167 0 : bio->iovpos = 0;
8168 0 : bio->iov_offset = 0;
8169 :
8170 0 : rc = spdk_nvme_ns_cmd_readv_with_md(bio->io_path->nvme_ns->ns,
8171 0 : bio->io_path->qpair->qpair,
8172 0 : lba, lba_count,
8173 0 : bdev_nvme_no_pi_readv_done, bio, 0,
8174 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
8175 0 : md, 0, 0);
8176 :
8177 0 : if (rc != 0 && rc != -ENOMEM) {
8178 0 : SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
8179 0 : }
8180 0 : return rc;
8181 : }
8182 :
8183 : static int
8184 3 : bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
8185 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
8186 : struct spdk_memory_domain *domain, void *domain_ctx,
8187 : struct spdk_accel_sequence *seq)
8188 : {
8189 3 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8190 3 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8191 : int rc;
8192 :
8193 3 : SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n",
8194 : lba_count, lba);
8195 :
8196 3 : bio->iovs = iov;
8197 3 : bio->iovcnt = iovcnt;
8198 3 : bio->iovpos = 0;
8199 3 : bio->iov_offset = 0;
8200 :
8201 3 : if (domain != NULL || seq != NULL) {
8202 1 : bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
8203 1 : bio->ext_opts.memory_domain = domain;
8204 1 : bio->ext_opts.memory_domain_ctx = domain_ctx;
8205 1 : bio->ext_opts.io_flags = flags;
8206 1 : bio->ext_opts.metadata = md;
8207 1 : bio->ext_opts.accel_sequence = seq;
8208 :
8209 1 : if (iovcnt == 1) {
8210 2 : rc = spdk_nvme_ns_cmd_read_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_readv_done,
8211 1 : bio, &bio->ext_opts);
8212 1 : } else {
8213 0 : rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count,
8214 0 : bdev_nvme_readv_done, bio,
8215 : bdev_nvme_queued_reset_sgl,
8216 : bdev_nvme_queued_next_sge,
8217 0 : &bio->ext_opts);
8218 : }
8219 3 : } else if (iovcnt == 1) {
8220 4 : rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base,
8221 2 : md, lba, lba_count, bdev_nvme_readv_done,
8222 2 : bio, flags, 0, 0);
8223 2 : } else {
8224 0 : rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count,
8225 0 : bdev_nvme_readv_done, bio, flags,
8226 : bdev_nvme_queued_reset_sgl,
8227 0 : bdev_nvme_queued_next_sge, md, 0, 0);
8228 : }
8229 :
8230 3 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
8231 0 : SPDK_ERRLOG("readv failed: rc = %d\n", rc);
8232 0 : }
8233 3 : return rc;
8234 : }
8235 :
8236 : static int
8237 25 : bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
8238 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
8239 : struct spdk_memory_domain *domain, void *domain_ctx,
8240 : struct spdk_accel_sequence *seq,
8241 : union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13)
8242 : {
8243 25 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8244 25 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8245 : int rc;
8246 :
8247 25 : SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
8248 : lba_count, lba);
8249 :
8250 25 : bio->iovs = iov;
8251 25 : bio->iovcnt = iovcnt;
8252 25 : bio->iovpos = 0;
8253 25 : bio->iov_offset = 0;
8254 :
8255 25 : if (domain != NULL || seq != NULL) {
8256 0 : bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
8257 0 : bio->ext_opts.memory_domain = domain;
8258 0 : bio->ext_opts.memory_domain_ctx = domain_ctx;
8259 0 : bio->ext_opts.io_flags = flags | SPDK_NVME_IO_FLAGS_DIRECTIVE(cdw12.write.dtype);
8260 0 : bio->ext_opts.cdw13 = cdw13.raw;
8261 0 : bio->ext_opts.metadata = md;
8262 0 : bio->ext_opts.accel_sequence = seq;
8263 :
8264 0 : if (iovcnt == 1) {
8265 0 : rc = spdk_nvme_ns_cmd_write_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_writev_done,
8266 0 : bio, &bio->ext_opts);
8267 0 : } else {
8268 0 : rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count,
8269 0 : bdev_nvme_writev_done, bio,
8270 : bdev_nvme_queued_reset_sgl,
8271 : bdev_nvme_queued_next_sge,
8272 0 : &bio->ext_opts);
8273 : }
8274 25 : } else if (iovcnt == 1) {
8275 50 : rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base,
8276 25 : md, lba, lba_count, bdev_nvme_writev_done,
8277 25 : bio, flags, 0, 0);
8278 25 : } else {
8279 0 : rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
8280 0 : bdev_nvme_writev_done, bio, flags,
8281 : bdev_nvme_queued_reset_sgl,
8282 0 : bdev_nvme_queued_next_sge, md, 0, 0);
8283 : }
8284 :
8285 25 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
8286 0 : SPDK_ERRLOG("writev failed: rc = %d\n", rc);
8287 0 : }
8288 25 : return rc;
8289 : }
8290 :
8291 : static int
8292 0 : bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
8293 : void *md, uint64_t lba_count, uint64_t zslba,
8294 : uint32_t flags)
8295 : {
8296 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8297 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8298 : int rc;
8299 :
8300 0 : SPDK_DEBUGLOG(bdev_nvme, "zone append %" PRIu64 " blocks to zone start lba %#" PRIx64 "\n",
8301 : lba_count, zslba);
8302 :
8303 0 : bio->iovs = iov;
8304 0 : bio->iovcnt = iovcnt;
8305 0 : bio->iovpos = 0;
8306 0 : bio->iov_offset = 0;
8307 :
8308 0 : if (iovcnt == 1) {
8309 0 : rc = spdk_nvme_zns_zone_append_with_md(ns, qpair, iov[0].iov_base, md, zslba,
8310 0 : lba_count,
8311 0 : bdev_nvme_zone_appendv_done, bio,
8312 0 : flags,
8313 : 0, 0);
8314 0 : } else {
8315 0 : rc = spdk_nvme_zns_zone_appendv_with_md(ns, qpair, zslba, lba_count,
8316 0 : bdev_nvme_zone_appendv_done, bio, flags,
8317 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
8318 0 : md, 0, 0);
8319 : }
8320 :
8321 0 : if (rc != 0 && rc != -ENOMEM) {
8322 0 : SPDK_ERRLOG("zone append failed: rc = %d\n", rc);
8323 0 : }
8324 0 : return rc;
8325 : }
8326 :
8327 : static int
8328 1 : bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
8329 : void *md, uint64_t lba_count, uint64_t lba,
8330 : uint32_t flags)
8331 : {
8332 : int rc;
8333 :
8334 1 : SPDK_DEBUGLOG(bdev_nvme, "compare %" PRIu64 " blocks with offset %#" PRIx64 "\n",
8335 : lba_count, lba);
8336 :
8337 1 : bio->iovs = iov;
8338 1 : bio->iovcnt = iovcnt;
8339 1 : bio->iovpos = 0;
8340 1 : bio->iov_offset = 0;
8341 :
8342 2 : rc = spdk_nvme_ns_cmd_comparev_with_md(bio->io_path->nvme_ns->ns,
8343 1 : bio->io_path->qpair->qpair,
8344 1 : lba, lba_count,
8345 1 : bdev_nvme_comparev_done, bio, flags,
8346 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
8347 1 : md, 0, 0);
8348 :
8349 1 : if (rc != 0 && rc != -ENOMEM) {
8350 0 : SPDK_ERRLOG("comparev failed: rc = %d\n", rc);
8351 0 : }
8352 1 : return rc;
8353 : }
8354 :
8355 : static int
8356 2 : bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt,
8357 : struct iovec *write_iov, int write_iovcnt,
8358 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags)
8359 : {
8360 2 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8361 2 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8362 2 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
8363 : int rc;
8364 :
8365 2 : SPDK_DEBUGLOG(bdev_nvme, "compare and write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
8366 : lba_count, lba);
8367 :
8368 2 : bio->iovs = cmp_iov;
8369 2 : bio->iovcnt = cmp_iovcnt;
8370 2 : bio->iovpos = 0;
8371 2 : bio->iov_offset = 0;
8372 2 : bio->fused_iovs = write_iov;
8373 2 : bio->fused_iovcnt = write_iovcnt;
8374 2 : bio->fused_iovpos = 0;
8375 2 : bio->fused_iov_offset = 0;
8376 :
8377 2 : if (bdev_io->num_retries == 0) {
8378 2 : bio->first_fused_submitted = false;
8379 2 : bio->first_fused_completed = false;
8380 2 : }
8381 :
8382 2 : if (!bio->first_fused_submitted) {
8383 2 : flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST;
8384 2 : memset(&bio->cpl, 0, sizeof(bio->cpl));
8385 :
8386 4 : rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count,
8387 2 : bdev_nvme_comparev_and_writev_done, bio, flags,
8388 2 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0);
8389 2 : if (rc == 0) {
8390 2 : bio->first_fused_submitted = true;
8391 2 : flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST;
8392 2 : } else {
8393 0 : if (rc != -ENOMEM) {
8394 0 : SPDK_ERRLOG("compare failed: rc = %d\n", rc);
8395 0 : }
8396 0 : return rc;
8397 : }
8398 2 : }
8399 :
8400 2 : flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND;
8401 :
8402 4 : rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
8403 2 : bdev_nvme_comparev_and_writev_done, bio, flags,
8404 2 : bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0);
8405 2 : if (rc != 0 && rc != -ENOMEM) {
8406 0 : SPDK_ERRLOG("write failed: rc = %d\n", rc);
8407 0 : rc = 0;
8408 0 : }
8409 :
8410 2 : return rc;
8411 2 : }
8412 :
8413 : static int
8414 1 : bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
8415 : {
8416 : struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
8417 : struct spdk_nvme_dsm_range *range;
8418 : uint64_t offset, remaining;
8419 : uint64_t num_ranges_u64;
8420 : uint16_t num_ranges;
8421 : int rc;
8422 :
8423 1 : num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
8424 : SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8425 1 : if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
8426 0 : SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
8427 0 : return -EINVAL;
8428 : }
8429 1 : num_ranges = (uint16_t)num_ranges_u64;
8430 :
8431 1 : offset = offset_blocks;
8432 1 : remaining = num_blocks;
8433 1 : range = &dsm_ranges[0];
8434 :
8435 : /* Fill max-size ranges until the remaining blocks fit into one range */
8436 1 : while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
8437 0 : range->attributes.raw = 0;
8438 0 : range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8439 0 : range->starting_lba = offset;
8440 :
8441 0 : offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8442 0 : remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8443 0 : range++;
8444 : }
8445 :
8446 : /* Final range describes the remaining blocks */
8447 1 : range->attributes.raw = 0;
8448 1 : range->length = remaining;
8449 1 : range->starting_lba = offset;
8450 :
8451 2 : rc = spdk_nvme_ns_cmd_dataset_management(bio->io_path->nvme_ns->ns,
8452 1 : bio->io_path->qpair->qpair,
8453 : SPDK_NVME_DSM_ATTR_DEALLOCATE,
8454 1 : dsm_ranges, num_ranges,
8455 1 : bdev_nvme_queued_done, bio);
8456 :
8457 1 : return rc;
8458 1 : }
8459 :
8460 : static int
8461 0 : bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
8462 : {
8463 0 : if (num_blocks > UINT16_MAX + 1) {
8464 0 : SPDK_ERRLOG("NVMe write zeroes is limited to 16-bit block count\n");
8465 0 : return -EINVAL;
8466 : }
8467 :
8468 0 : return spdk_nvme_ns_cmd_write_zeroes(bio->io_path->nvme_ns->ns,
8469 0 : bio->io_path->qpair->qpair,
8470 0 : offset_blocks, num_blocks,
8471 0 : bdev_nvme_queued_done, bio,
8472 : 0);
8473 0 : }
8474 :
8475 : static int
8476 0 : bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones,
8477 : struct spdk_bdev_zone_info *info)
8478 : {
8479 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8480 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8481 0 : uint32_t zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
8482 0 : uint64_t zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
8483 0 : uint64_t total_zones = spdk_nvme_zns_ns_get_num_zones(ns);
8484 :
8485 0 : if (zone_id % zone_size != 0) {
8486 0 : return -EINVAL;
8487 : }
8488 :
8489 0 : if (num_zones > total_zones || !num_zones) {
8490 0 : return -EINVAL;
8491 : }
8492 :
8493 0 : assert(!bio->zone_report_buf);
8494 0 : bio->zone_report_buf = calloc(1, zone_report_bufsize);
8495 0 : if (!bio->zone_report_buf) {
8496 0 : return -ENOMEM;
8497 : }
8498 :
8499 0 : bio->handled_zones = 0;
8500 :
8501 0 : return spdk_nvme_zns_report_zones(ns, qpair, bio->zone_report_buf, zone_report_bufsize,
8502 0 : zone_id, SPDK_NVME_ZRA_LIST_ALL, true,
8503 0 : bdev_nvme_get_zone_info_done, bio);
8504 0 : }
8505 :
8506 : static int
8507 0 : bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
8508 : enum spdk_bdev_zone_action action)
8509 : {
8510 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8511 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8512 :
8513 0 : switch (action) {
8514 : case SPDK_BDEV_ZONE_CLOSE:
8515 0 : return spdk_nvme_zns_close_zone(ns, qpair, zone_id, false,
8516 0 : bdev_nvme_zone_management_done, bio);
8517 : case SPDK_BDEV_ZONE_FINISH:
8518 0 : return spdk_nvme_zns_finish_zone(ns, qpair, zone_id, false,
8519 0 : bdev_nvme_zone_management_done, bio);
8520 : case SPDK_BDEV_ZONE_OPEN:
8521 0 : return spdk_nvme_zns_open_zone(ns, qpair, zone_id, false,
8522 0 : bdev_nvme_zone_management_done, bio);
8523 : case SPDK_BDEV_ZONE_RESET:
8524 0 : return spdk_nvme_zns_reset_zone(ns, qpair, zone_id, false,
8525 0 : bdev_nvme_zone_management_done, bio);
8526 : case SPDK_BDEV_ZONE_OFFLINE:
8527 0 : return spdk_nvme_zns_offline_zone(ns, qpair, zone_id, false,
8528 0 : bdev_nvme_zone_management_done, bio);
8529 : default:
8530 0 : return -EINVAL;
8531 : }
8532 0 : }
8533 :
8534 : static void
8535 5 : bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
8536 : struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
8537 : {
8538 : struct nvme_io_path *io_path;
8539 : struct nvme_ctrlr *nvme_ctrlr;
8540 : uint32_t max_xfer_size;
8541 5 : int rc = -ENXIO;
8542 :
8543 : /* Choose the first ctrlr which is not failed. */
8544 8 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
8545 7 : nvme_ctrlr = io_path->qpair->ctrlr;
8546 :
8547 : /* We should skip any unavailable nvme_ctrlr rather than checking
8548 : * if the return value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
8549 : */
8550 7 : if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
8551 3 : continue;
8552 : }
8553 :
8554 4 : max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ctrlr->ctrlr);
8555 :
8556 4 : if (nbytes > max_xfer_size) {
8557 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8558 0 : rc = -EINVAL;
8559 0 : goto err;
8560 : }
8561 :
8562 8 : rc = spdk_nvme_ctrlr_cmd_admin_raw(nvme_ctrlr->ctrlr, cmd, buf, (uint32_t)nbytes,
8563 4 : bdev_nvme_admin_passthru_done, bio);
8564 4 : if (rc == 0) {
8565 4 : return;
8566 : }
8567 1 : }
8568 :
8569 : err:
8570 1 : bdev_nvme_admin_complete(bio, rc);
8571 5 : }
8572 :
8573 : static int
8574 0 : bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
8575 : void *buf, size_t nbytes)
8576 : {
8577 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8578 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8579 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8580 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8581 :
8582 0 : if (nbytes > max_xfer_size) {
8583 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8584 0 : return -EINVAL;
8585 : }
8586 :
8587 : /*
8588 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
8589 : * so fill it out automatically.
8590 : */
8591 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8592 :
8593 0 : return spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, cmd, buf,
8594 0 : (uint32_t)nbytes, bdev_nvme_queued_done, bio);
8595 0 : }
8596 :
8597 : static int
8598 0 : bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
8599 : void *buf, size_t nbytes, void *md_buf, size_t md_len)
8600 : {
8601 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8602 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8603 0 : size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
8604 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8605 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8606 :
8607 0 : if (nbytes > max_xfer_size) {
8608 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8609 0 : return -EINVAL;
8610 : }
8611 :
8612 0 : if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
8613 0 : SPDK_ERRLOG("invalid meta data buffer size\n");
8614 0 : return -EINVAL;
8615 : }
8616 :
8617 : /*
8618 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
8619 : * so fill it out automatically.
8620 : */
8621 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8622 :
8623 0 : return spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, qpair, cmd, buf,
8624 0 : (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
8625 0 : }
8626 :
8627 : static int
8628 0 : bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio,
8629 : struct spdk_nvme_cmd *cmd, struct iovec *iov, int iovcnt,
8630 : size_t nbytes, void *md_buf, size_t md_len)
8631 : {
8632 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8633 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8634 0 : size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
8635 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8636 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8637 :
8638 0 : bio->iovs = iov;
8639 0 : bio->iovcnt = iovcnt;
8640 0 : bio->iovpos = 0;
8641 0 : bio->iov_offset = 0;
8642 :
8643 0 : if (nbytes > max_xfer_size) {
8644 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8645 0 : return -EINVAL;
8646 : }
8647 :
8648 0 : if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
8649 0 : SPDK_ERRLOG("invalid meta data buffer size\n");
8650 0 : return -EINVAL;
8651 : }
8652 :
8653 : /*
8654 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands
8655 : * require a nsid, so fill it out automatically.
8656 : */
8657 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8658 :
8659 0 : return spdk_nvme_ctrlr_cmd_iov_raw_with_md(
8660 0 : ctrlr, qpair, cmd, (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio,
8661 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
8662 0 : }
8663 :
8664 : static void
8665 6 : bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
8666 : struct nvme_bdev_io *bio_to_abort)
8667 : {
8668 : struct nvme_io_path *io_path;
8669 6 : int rc = 0;
8670 :
8671 6 : rc = bdev_nvme_abort_retry_io(nbdev_ch, bio_to_abort);
8672 6 : if (rc == 0) {
8673 1 : bdev_nvme_admin_complete(bio, 0);
8674 1 : return;
8675 : }
8676 :
8677 5 : io_path = bio_to_abort->io_path;
8678 5 : if (io_path != NULL) {
8679 6 : rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
8680 3 : io_path->qpair->qpair,
8681 3 : bio_to_abort,
8682 3 : bdev_nvme_abort_done, bio);
8683 3 : } else {
8684 3 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
8685 4 : rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
8686 : NULL,
8687 2 : bio_to_abort,
8688 2 : bdev_nvme_abort_done, bio);
8689 :
8690 2 : if (rc != -ENOENT) {
8691 1 : break;
8692 : }
8693 1 : }
8694 : }
8695 :
8696 5 : if (rc != 0) {
8697 : /* If no command was found or there was any error, complete the abort
8698 : * request with failure.
8699 : */
8700 2 : bdev_nvme_admin_complete(bio, rc);
8701 2 : }
8702 6 : }
8703 :
8704 : static int
8705 0 : bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
8706 : uint64_t num_blocks)
8707 : {
8708 0 : struct spdk_nvme_scc_source_range range = {
8709 0 : .slba = src_offset_blocks,
8710 0 : .nlb = num_blocks - 1
8711 : };
8712 :
8713 0 : return spdk_nvme_ns_cmd_copy(bio->io_path->nvme_ns->ns,
8714 0 : bio->io_path->qpair->qpair,
8715 0 : &range, 1, dst_offset_blocks,
8716 0 : bdev_nvme_queued_done, bio);
8717 : }
8718 :
8719 : static void
8720 0 : bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
8721 : {
8722 : const char *action;
8723 : uint32_t i;
8724 :
8725 0 : if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
8726 0 : action = "reset";
8727 0 : } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
8728 0 : action = "abort";
8729 0 : } else {
8730 0 : action = "none";
8731 : }
8732 :
8733 0 : spdk_json_write_object_begin(w);
8734 :
8735 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_set_options");
8736 :
8737 0 : spdk_json_write_named_object_begin(w, "params");
8738 0 : spdk_json_write_named_string(w, "action_on_timeout", action);
8739 0 : spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
8740 0 : spdk_json_write_named_uint64(w, "timeout_admin_us", g_opts.timeout_admin_us);
8741 0 : spdk_json_write_named_uint32(w, "keep_alive_timeout_ms", g_opts.keep_alive_timeout_ms);
8742 0 : spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst);
8743 0 : spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight);
8744 0 : spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight);
8745 0 : spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight);
8746 0 : spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
8747 0 : spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
8748 0 : spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
8749 0 : spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
8750 0 : spdk_json_write_named_uint32(w, "transport_retry_count", g_opts.transport_retry_count);
8751 0 : spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
8752 0 : spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
8753 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
8754 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
8755 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
8756 0 : spdk_json_write_named_bool(w, "disable_auto_failback", g_opts.disable_auto_failback);
8757 0 : spdk_json_write_named_bool(w, "generate_uuids", g_opts.generate_uuids);
8758 0 : spdk_json_write_named_uint8(w, "transport_tos", g_opts.transport_tos);
8759 0 : spdk_json_write_named_bool(w, "nvme_error_stat", g_opts.nvme_error_stat);
8760 0 : spdk_json_write_named_uint32(w, "rdma_srq_size", g_opts.rdma_srq_size);
8761 0 : spdk_json_write_named_bool(w, "io_path_stat", g_opts.io_path_stat);
8762 0 : spdk_json_write_named_bool(w, "allow_accel_sequence", g_opts.allow_accel_sequence);
8763 0 : spdk_json_write_named_uint32(w, "rdma_max_cq_size", g_opts.rdma_max_cq_size);
8764 0 : spdk_json_write_named_uint16(w, "rdma_cm_event_timeout_ms", g_opts.rdma_cm_event_timeout_ms);
8765 0 : spdk_json_write_named_array_begin(w, "dhchap_digests");
8766 0 : for (i = 0; i < 32; ++i) {
8767 0 : if (g_opts.dhchap_digests & SPDK_BIT(i)) {
8768 0 : spdk_json_write_string(w, spdk_nvme_dhchap_get_digest_name(i));
8769 0 : }
8770 0 : }
8771 0 : spdk_json_write_array_end(w);
8772 0 : spdk_json_write_named_array_begin(w, "dhchap_dhgroups");
8773 0 : for (i = 0; i < 32; ++i) {
8774 0 : if (g_opts.dhchap_dhgroups & SPDK_BIT(i)) {
8775 0 : spdk_json_write_string(w, spdk_nvme_dhchap_get_dhgroup_name(i));
8776 0 : }
8777 0 : }
8778 :
8779 0 : spdk_json_write_array_end(w);
8780 0 : spdk_json_write_object_end(w);
8781 :
8782 0 : spdk_json_write_object_end(w);
8783 0 : }
8784 :
8785 : static void
8786 0 : bdev_nvme_discovery_config_json(struct spdk_json_write_ctx *w, struct discovery_ctx *ctx)
8787 : {
8788 : struct spdk_nvme_transport_id trid;
8789 :
8790 0 : spdk_json_write_object_begin(w);
8791 :
8792 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_start_discovery");
8793 :
8794 0 : spdk_json_write_named_object_begin(w, "params");
8795 0 : spdk_json_write_named_string(w, "name", ctx->name);
8796 0 : spdk_json_write_named_string(w, "hostnqn", ctx->hostnqn);
8797 :
8798 0 : trid = ctx->trid;
8799 0 : memset(trid.subnqn, 0, sizeof(trid.subnqn));
8800 0 : nvme_bdev_dump_trid_json(&trid, w);
8801 :
8802 0 : spdk_json_write_named_bool(w, "wait_for_attach", ctx->wait_for_attach);
8803 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", ctx->bdev_opts.ctrlr_loss_timeout_sec);
8804 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", ctx->bdev_opts.reconnect_delay_sec);
8805 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
8806 0 : ctx->bdev_opts.fast_io_fail_timeout_sec);
8807 0 : spdk_json_write_object_end(w);
8808 :
8809 0 : spdk_json_write_object_end(w);
8810 0 : }
8811 :
8812 : #ifdef SPDK_CONFIG_NVME_CUSE
8813 : static void
8814 : nvme_ctrlr_cuse_config_json(struct spdk_json_write_ctx *w,
8815 : struct nvme_ctrlr *nvme_ctrlr)
8816 : {
8817 : size_t cuse_name_size = 128;
8818 : char cuse_name[cuse_name_size];
8819 :
8820 : if (spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr,
8821 : cuse_name, &cuse_name_size) != 0) {
8822 : return;
8823 : }
8824 :
8825 : spdk_json_write_object_begin(w);
8826 :
8827 : spdk_json_write_named_string(w, "method", "bdev_nvme_cuse_register");
8828 :
8829 : spdk_json_write_named_object_begin(w, "params");
8830 : spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
8831 : spdk_json_write_object_end(w);
8832 :
8833 : spdk_json_write_object_end(w);
8834 : }
8835 : #endif
8836 :
8837 : static void
8838 0 : nvme_ctrlr_config_json(struct spdk_json_write_ctx *w,
8839 : struct nvme_ctrlr *nvme_ctrlr,
8840 : struct nvme_path_id *path_id)
8841 : {
8842 : struct spdk_nvme_transport_id *trid;
8843 : const struct spdk_nvme_ctrlr_opts *opts;
8844 :
8845 0 : if (nvme_ctrlr->opts.from_discovery_service) {
8846 : /* Do not emit an RPC for this - it will be implicitly
8847 : * covered by a separate bdev_nvme_start_discovery or
8848 : * bdev_nvme_start_mdns_discovery RPC.
8849 : */
8850 0 : return;
8851 : }
8852 :
8853 0 : trid = &path_id->trid;
8854 :
8855 0 : spdk_json_write_object_begin(w);
8856 :
8857 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller");
8858 :
8859 0 : spdk_json_write_named_object_begin(w, "params");
8860 0 : spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
8861 0 : nvme_bdev_dump_trid_json(trid, w);
8862 0 : spdk_json_write_named_bool(w, "prchk_reftag",
8863 0 : (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
8864 0 : spdk_json_write_named_bool(w, "prchk_guard",
8865 0 : (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
8866 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->opts.ctrlr_loss_timeout_sec);
8867 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->opts.reconnect_delay_sec);
8868 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
8869 0 : nvme_ctrlr->opts.fast_io_fail_timeout_sec);
8870 0 : if (nvme_ctrlr->psk != NULL) {
8871 0 : spdk_json_write_named_string(w, "psk", spdk_key_get_name(nvme_ctrlr->psk));
8872 0 : }
8873 0 : if (nvme_ctrlr->dhchap_key != NULL) {
8874 0 : spdk_json_write_named_string(w, "dhchap_key",
8875 0 : spdk_key_get_name(nvme_ctrlr->dhchap_key));
8876 0 : }
8877 0 : if (nvme_ctrlr->dhchap_ctrlr_key != NULL) {
8878 0 : spdk_json_write_named_string(w, "dhchap_ctrlr_key",
8879 0 : spdk_key_get_name(nvme_ctrlr->dhchap_ctrlr_key));
8880 0 : }
8881 0 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
8882 0 : spdk_json_write_named_string(w, "hostnqn", opts->hostnqn);
8883 0 : spdk_json_write_named_bool(w, "hdgst", opts->header_digest);
8884 0 : spdk_json_write_named_bool(w, "ddgst", opts->data_digest);
8885 0 : if (opts->src_addr[0] != '\0') {
8886 0 : spdk_json_write_named_string(w, "hostaddr", opts->src_addr);
8887 0 : }
8888 0 : if (opts->src_svcid[0] != '\0') {
8889 0 : spdk_json_write_named_string(w, "hostsvcid", opts->src_svcid);
8890 0 : }
8891 :
8892 0 : if (nvme_ctrlr->opts.multipath) {
8893 0 : spdk_json_write_named_string(w, "multipath", "multipath");
8894 0 : }
8895 0 : spdk_json_write_object_end(w);
8896 :
8897 0 : spdk_json_write_object_end(w);
8898 0 : }
8899 :
8900 : static void
8901 0 : bdev_nvme_hotplug_config_json(struct spdk_json_write_ctx *w)
8902 : {
8903 0 : spdk_json_write_object_begin(w);
8904 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug");
8905 :
8906 0 : spdk_json_write_named_object_begin(w, "params");
8907 0 : spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
8908 0 : spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
8909 0 : spdk_json_write_object_end(w);
8910 :
8911 0 : spdk_json_write_object_end(w);
8912 0 : }
8913 :
8914 : static int
8915 0 : bdev_nvme_config_json(struct spdk_json_write_ctx *w)
8916 : {
8917 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
8918 : struct nvme_ctrlr *nvme_ctrlr;
8919 : struct discovery_ctx *ctx;
8920 : struct nvme_path_id *path_id;
8921 :
8922 0 : bdev_nvme_opts_config_json(w);
8923 :
8924 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
8925 :
8926 0 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
8927 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
8928 0 : path_id = nvme_ctrlr->active_path_id;
8929 0 : assert(path_id == TAILQ_FIRST(&nvme_ctrlr->trids));
8930 0 : nvme_ctrlr_config_json(w, nvme_ctrlr, path_id);
8931 :
8932 0 : path_id = TAILQ_NEXT(path_id, link);
8933 0 : while (path_id != NULL) {
8934 0 : nvme_ctrlr_config_json(w, nvme_ctrlr, path_id);
8935 0 : path_id = TAILQ_NEXT(path_id, link);
8936 : }
8937 :
8938 : #ifdef SPDK_CONFIG_NVME_CUSE
8939 : nvme_ctrlr_cuse_config_json(w, nvme_ctrlr);
8940 : #endif
8941 0 : }
8942 0 : }
8943 :
8944 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
8945 0 : if (!ctx->from_mdns_discovery_service) {
8946 0 : bdev_nvme_discovery_config_json(w, ctx);
8947 0 : }
8948 0 : }
8949 :
8950 0 : bdev_nvme_mdns_discovery_config_json(w);
8951 :
8952 : /* Dump as last parameter to give all NVMe bdevs chance to be constructed
8953 : * before enabling hotplug poller.
8954 : */
8955 0 : bdev_nvme_hotplug_config_json(w);
8956 :
8957 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
8958 0 : return 0;
8959 : }
8960 :
8961 : struct spdk_nvme_ctrlr *
8962 1 : bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
8963 : {
8964 : struct nvme_bdev *nbdev;
8965 : struct nvme_ns *nvme_ns;
8966 :
8967 1 : if (!bdev || bdev->module != &nvme_if) {
8968 0 : return NULL;
8969 : }
8970 :
8971 1 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
8972 1 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
8973 1 : assert(nvme_ns != NULL);
8974 :
8975 1 : return nvme_ns->ctrlr->ctrlr;
8976 1 : }
8977 :
8978 : static bool
8979 12 : nvme_io_path_is_current(struct nvme_io_path *io_path)
8980 : {
8981 : const struct nvme_bdev_channel *nbdev_ch;
8982 : bool current;
8983 :
8984 12 : if (!nvme_io_path_is_available(io_path)) {
8985 4 : return false;
8986 : }
8987 :
8988 8 : nbdev_ch = io_path->nbdev_ch;
8989 8 : if (nbdev_ch == NULL) {
8990 1 : current = false;
8991 8 : } else if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
8992 3 : struct nvme_io_path *optimized_io_path = NULL;
8993 :
8994 6 : STAILQ_FOREACH(optimized_io_path, &nbdev_ch->io_path_list, stailq) {
8995 5 : if (optimized_io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) {
8996 2 : break;
8997 : }
8998 3 : }
8999 :
9000 : /* A non-optimized path is only current if there are no optimized paths. */
9001 3 : current = (io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) ||
9002 2 : (optimized_io_path == NULL);
9003 3 : } else {
9004 4 : if (nbdev_ch->current_io_path) {
9005 1 : current = (io_path == nbdev_ch->current_io_path);
9006 1 : } else {
9007 : struct nvme_io_path *first_path;
9008 :
9009 : /* We arrived here as there are no optimized paths for active-passive
9010 : * mode. Check if this io_path is the first one available on the list.
9011 : */
9012 3 : current = false;
9013 3 : STAILQ_FOREACH(first_path, &nbdev_ch->io_path_list, stailq) {
9014 3 : if (nvme_io_path_is_available(first_path)) {
9015 3 : current = (io_path == first_path);
9016 3 : break;
9017 : }
9018 0 : }
9019 : }
9020 : }
9021 :
9022 8 : return current;
9023 12 : }
9024 :
9025 : static struct nvme_ctrlr *
9026 0 : bdev_nvme_next_ctrlr_unsafe(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct nvme_ctrlr *prev)
9027 : {
9028 : struct nvme_ctrlr *next;
9029 :
9030 : /* Must be called under g_bdev_nvme_mutex */
9031 0 : next = prev != NULL ? TAILQ_NEXT(prev, tailq) : TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
9032 0 : while (next != NULL) {
9033 : /* ref can be 0 when the ctrlr was released, but hasn't been detached yet */
9034 0 : pthread_mutex_lock(&next->mutex);
9035 0 : if (next->ref > 0) {
9036 0 : next->ref++;
9037 0 : pthread_mutex_unlock(&next->mutex);
9038 0 : return next;
9039 : }
9040 :
9041 0 : pthread_mutex_unlock(&next->mutex);
9042 0 : next = TAILQ_NEXT(next, tailq);
9043 : }
9044 :
9045 0 : return NULL;
9046 0 : }
9047 :
9048 : struct bdev_nvme_set_keys_ctx {
9049 : struct nvme_ctrlr *nctrlr;
9050 : struct spdk_key *dhchap_key;
9051 : struct spdk_key *dhchap_ctrlr_key;
9052 : struct spdk_thread *thread;
9053 : bdev_nvme_set_keys_cb cb_fn;
9054 : void *cb_ctx;
9055 : int status;
9056 : };
9057 :
9058 : static void
9059 0 : bdev_nvme_free_set_keys_ctx(struct bdev_nvme_set_keys_ctx *ctx)
9060 : {
9061 0 : if (ctx == NULL) {
9062 0 : return;
9063 : }
9064 :
9065 0 : spdk_keyring_put_key(ctx->dhchap_key);
9066 0 : spdk_keyring_put_key(ctx->dhchap_ctrlr_key);
9067 0 : free(ctx);
9068 0 : }
9069 :
9070 : static void
9071 0 : _bdev_nvme_set_keys_done(void *_ctx)
9072 : {
9073 0 : struct bdev_nvme_set_keys_ctx *ctx = _ctx;
9074 :
9075 0 : ctx->cb_fn(ctx->cb_ctx, ctx->status);
9076 :
9077 0 : if (ctx->nctrlr != NULL) {
9078 0 : nvme_ctrlr_put_ref(ctx->nctrlr);
9079 0 : }
9080 0 : bdev_nvme_free_set_keys_ctx(ctx);
9081 0 : }
9082 :
9083 : static void
9084 0 : bdev_nvme_set_keys_done(struct bdev_nvme_set_keys_ctx *ctx, int status)
9085 : {
9086 0 : ctx->status = status;
9087 0 : spdk_thread_exec_msg(ctx->thread, _bdev_nvme_set_keys_done, ctx);
9088 0 : }
9089 :
9090 : static void bdev_nvme_authenticate_ctrlr(struct bdev_nvme_set_keys_ctx *ctx);
9091 :
9092 : static void
9093 0 : bdev_nvme_authenticate_ctrlr_continue(struct bdev_nvme_set_keys_ctx *ctx)
9094 : {
9095 : struct nvme_ctrlr *next;
9096 :
9097 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
9098 0 : next = bdev_nvme_next_ctrlr_unsafe(NULL, ctx->nctrlr);
9099 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
9100 :
9101 0 : nvme_ctrlr_put_ref(ctx->nctrlr);
9102 0 : ctx->nctrlr = next;
9103 :
9104 0 : if (next == NULL) {
9105 0 : bdev_nvme_set_keys_done(ctx, 0);
9106 0 : } else {
9107 0 : bdev_nvme_authenticate_ctrlr(ctx);
9108 : }
9109 0 : }
9110 :
9111 : static void
9112 0 : bdev_nvme_authenticate_qpairs_done(struct spdk_io_channel_iter *i, int status)
9113 : {
9114 0 : struct bdev_nvme_set_keys_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
9115 :
9116 0 : if (status != 0) {
9117 0 : bdev_nvme_set_keys_done(ctx, status);
9118 0 : return;
9119 : }
9120 0 : bdev_nvme_authenticate_ctrlr_continue(ctx);
9121 0 : }
9122 :
9123 : static void
9124 0 : bdev_nvme_authenticate_qpair_done(void *ctx, int status)
9125 : {
9126 0 : spdk_for_each_channel_continue(ctx, status);
9127 0 : }
9128 :
9129 : static void
9130 0 : bdev_nvme_authenticate_qpair(struct spdk_io_channel_iter *i)
9131 : {
9132 0 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
9133 0 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
9134 0 : struct nvme_qpair *qpair = ctrlr_ch->qpair;
9135 : int rc;
9136 :
9137 0 : if (!nvme_qpair_is_connected(qpair)) {
9138 0 : spdk_for_each_channel_continue(i, 0);
9139 0 : return;
9140 : }
9141 :
9142 0 : rc = spdk_nvme_qpair_authenticate(qpair->qpair, bdev_nvme_authenticate_qpair_done, i);
9143 0 : if (rc != 0) {
9144 0 : spdk_for_each_channel_continue(i, rc);
9145 0 : }
9146 0 : }
9147 :
9148 : static void
9149 0 : bdev_nvme_authenticate_ctrlr_done(void *_ctx, int status)
9150 : {
9151 0 : struct bdev_nvme_set_keys_ctx *ctx = _ctx;
9152 :
9153 0 : if (status != 0) {
9154 0 : bdev_nvme_set_keys_done(ctx, status);
9155 0 : return;
9156 : }
9157 :
9158 0 : spdk_for_each_channel(ctx->nctrlr, bdev_nvme_authenticate_qpair, ctx,
9159 : bdev_nvme_authenticate_qpairs_done);
9160 0 : }
9161 :
9162 : static void
9163 0 : bdev_nvme_authenticate_ctrlr(struct bdev_nvme_set_keys_ctx *ctx)
9164 : {
9165 0 : struct spdk_nvme_ctrlr_key_opts opts = {};
9166 0 : struct nvme_ctrlr *nctrlr = ctx->nctrlr;
9167 : int rc;
9168 :
9169 0 : opts.size = SPDK_SIZEOF(&opts, dhchap_ctrlr_key);
9170 0 : opts.dhchap_key = ctx->dhchap_key;
9171 0 : opts.dhchap_ctrlr_key = ctx->dhchap_ctrlr_key;
9172 0 : rc = spdk_nvme_ctrlr_set_keys(nctrlr->ctrlr, &opts);
9173 0 : if (rc != 0) {
9174 0 : bdev_nvme_set_keys_done(ctx, rc);
9175 0 : return;
9176 : }
9177 :
9178 0 : if (ctx->dhchap_key != NULL) {
9179 0 : rc = spdk_nvme_ctrlr_authenticate(nctrlr->ctrlr,
9180 0 : bdev_nvme_authenticate_ctrlr_done, ctx);
9181 0 : if (rc != 0) {
9182 0 : bdev_nvme_set_keys_done(ctx, rc);
9183 0 : }
9184 0 : } else {
9185 0 : bdev_nvme_authenticate_ctrlr_continue(ctx);
9186 : }
9187 0 : }
9188 :
9189 : int
9190 0 : bdev_nvme_set_keys(const char *name, const char *dhchap_key, const char *dhchap_ctrlr_key,
9191 : bdev_nvme_set_keys_cb cb_fn, void *cb_ctx)
9192 : {
9193 : struct bdev_nvme_set_keys_ctx *ctx;
9194 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
9195 : struct nvme_ctrlr *nctrlr;
9196 :
9197 0 : ctx = calloc(1, sizeof(*ctx));
9198 0 : if (ctx == NULL) {
9199 0 : return -ENOMEM;
9200 : }
9201 :
9202 0 : if (dhchap_key != NULL) {
9203 0 : ctx->dhchap_key = spdk_keyring_get_key(dhchap_key);
9204 0 : if (ctx->dhchap_key == NULL) {
9205 0 : SPDK_ERRLOG("Could not find key %s for bdev %s\n", dhchap_key, name);
9206 0 : bdev_nvme_free_set_keys_ctx(ctx);
9207 0 : return -ENOKEY;
9208 : }
9209 0 : }
9210 0 : if (dhchap_ctrlr_key != NULL) {
9211 0 : ctx->dhchap_ctrlr_key = spdk_keyring_get_key(dhchap_ctrlr_key);
9212 0 : if (ctx->dhchap_ctrlr_key == NULL) {
9213 0 : SPDK_ERRLOG("Could not find key %s for bdev %s\n", dhchap_ctrlr_key, name);
9214 0 : bdev_nvme_free_set_keys_ctx(ctx);
9215 0 : return -ENOKEY;
9216 : }
9217 0 : }
9218 :
9219 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
9220 0 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
9221 0 : if (nbdev_ctrlr == NULL) {
9222 0 : SPDK_ERRLOG("Could not find bdev_ctrlr %s\n", name);
9223 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
9224 0 : bdev_nvme_free_set_keys_ctx(ctx);
9225 0 : return -ENODEV;
9226 : }
9227 0 : nctrlr = bdev_nvme_next_ctrlr_unsafe(nbdev_ctrlr, NULL);
9228 0 : if (nctrlr == NULL) {
9229 0 : SPDK_ERRLOG("Could not find any nvme_ctrlrs on bdev_ctrlr %s\n", name);
9230 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
9231 0 : bdev_nvme_free_set_keys_ctx(ctx);
9232 0 : return -ENODEV;
9233 : }
9234 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
9235 :
9236 0 : ctx->nctrlr = nctrlr;
9237 0 : ctx->cb_fn = cb_fn;
9238 0 : ctx->cb_ctx = cb_ctx;
9239 0 : ctx->thread = spdk_get_thread();
9240 :
9241 0 : bdev_nvme_authenticate_ctrlr(ctx);
9242 :
9243 0 : return 0;
9244 0 : }
9245 :
9246 : void
9247 0 : nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path)
9248 : {
9249 0 : struct nvme_ns *nvme_ns = io_path->nvme_ns;
9250 0 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
9251 : const struct spdk_nvme_ctrlr_data *cdata;
9252 : const struct spdk_nvme_transport_id *trid;
9253 : const char *adrfam_str;
9254 :
9255 0 : spdk_json_write_object_begin(w);
9256 :
9257 0 : spdk_json_write_named_string(w, "bdev_name", nvme_ns->bdev->disk.name);
9258 :
9259 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
9260 0 : trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);
9261 :
9262 0 : spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
9263 0 : spdk_json_write_named_bool(w, "current", nvme_io_path_is_current(io_path));
9264 0 : spdk_json_write_named_bool(w, "connected", nvme_qpair_is_connected(io_path->qpair));
9265 0 : spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));
9266 :
9267 0 : spdk_json_write_named_object_begin(w, "transport");
9268 0 : spdk_json_write_named_string(w, "trtype", trid->trstring);
9269 0 : spdk_json_write_named_string(w, "traddr", trid->traddr);
9270 0 : if (trid->trsvcid[0] != '\0') {
9271 0 : spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
9272 0 : }
9273 0 : adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
9274 0 : if (adrfam_str) {
9275 0 : spdk_json_write_named_string(w, "adrfam", adrfam_str);
9276 0 : }
9277 0 : spdk_json_write_object_end(w);
9278 :
9279 0 : spdk_json_write_object_end(w);
9280 0 : }
9281 :
9282 : void
9283 0 : bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w)
9284 : {
9285 : struct discovery_ctx *ctx;
9286 : struct discovery_entry_ctx *entry_ctx;
9287 :
9288 0 : spdk_json_write_array_begin(w);
9289 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
9290 0 : spdk_json_write_object_begin(w);
9291 0 : spdk_json_write_named_string(w, "name", ctx->name);
9292 :
9293 0 : spdk_json_write_named_object_begin(w, "trid");
9294 0 : nvme_bdev_dump_trid_json(&ctx->trid, w);
9295 0 : spdk_json_write_object_end(w);
9296 :
9297 0 : spdk_json_write_named_array_begin(w, "referrals");
9298 0 : TAILQ_FOREACH(entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
9299 0 : spdk_json_write_object_begin(w);
9300 0 : spdk_json_write_named_object_begin(w, "trid");
9301 0 : nvme_bdev_dump_trid_json(&entry_ctx->trid, w);
9302 0 : spdk_json_write_object_end(w);
9303 0 : spdk_json_write_object_end(w);
9304 0 : }
9305 0 : spdk_json_write_array_end(w);
9306 :
9307 0 : spdk_json_write_object_end(w);
9308 0 : }
9309 0 : spdk_json_write_array_end(w);
9310 0 : }
9311 :
9312 1 : SPDK_LOG_REGISTER_COMPONENT(bdev_nvme)
9313 :
9314 : static void
9315 0 : bdev_nvme_trace(void)
9316 : {
9317 0 : struct spdk_trace_tpoint_opts opts[] = {
9318 : {
9319 : "BDEV_NVME_IO_START", TRACE_BDEV_NVME_IO_START,
9320 : OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 1,
9321 : {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
9322 : },
9323 : {
9324 : "BDEV_NVME_IO_DONE", TRACE_BDEV_NVME_IO_DONE,
9325 : OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 0,
9326 : {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
9327 : }
9328 : };
9329 :
9330 :
9331 0 : spdk_trace_register_object(OBJECT_BDEV_NVME_IO, 'N');
9332 0 : spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
9333 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
9334 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
9335 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
9336 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
9337 0 : }
9338 1 : SPDK_TRACE_REGISTER_FN(bdev_nvme_trace, "bdev_nvme", TRACE_GROUP_BDEV_NVME)
|