Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2016 Intel Corporation. All rights reserved.
3 : * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4 : * Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : */
6 :
7 : #include "spdk/stdinc.h"
8 :
9 : #include "spdk/bdev.h"
10 :
11 : #include "spdk/accel.h"
12 : #include "spdk/config.h"
13 : #include "spdk/env.h"
14 : #include "spdk/thread.h"
15 : #include "spdk/likely.h"
16 : #include "spdk/queue.h"
17 : #include "spdk/nvme_spec.h"
18 : #include "spdk/scsi_spec.h"
19 : #include "spdk/notify.h"
20 : #include "spdk/util.h"
21 : #include "spdk/trace.h"
22 : #include "spdk/dma.h"
23 :
24 : #include "spdk/bdev_module.h"
25 : #include "spdk/log.h"
26 : #include "spdk/string.h"
27 :
28 : #include "bdev_internal.h"
29 : #include "spdk_internal/trace_defs.h"
30 : #include "spdk_internal/assert.h"
31 :
32 : #ifdef SPDK_CONFIG_VTUNE
33 : #include "ittnotify.h"
34 : #include "ittnotify_types.h"
35 : int __itt_init_ittlib(const char *, __itt_group_id);
36 : #endif
37 :
38 : #define SPDK_BDEV_IO_POOL_SIZE (64 * 1024 - 1)
39 : #define SPDK_BDEV_IO_CACHE_SIZE 256
40 : #define SPDK_BDEV_AUTO_EXAMINE true
41 : #define BUF_SMALL_CACHE_SIZE 128
42 : #define BUF_LARGE_CACHE_SIZE 16
43 : #define NOMEM_THRESHOLD_COUNT 8
44 :
45 : #define SPDK_BDEV_QOS_TIMESLICE_IN_USEC 1000
46 : #define SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE 1
47 : #define SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE 512
48 : #define SPDK_BDEV_QOS_MIN_IOS_PER_SEC 1000
49 : #define SPDK_BDEV_QOS_MIN_BYTES_PER_SEC (1024 * 1024)
50 : #define SPDK_BDEV_QOS_MAX_MBYTES_PER_SEC (UINT64_MAX / (1024 * 1024))
51 : #define SPDK_BDEV_QOS_LIMIT_NOT_DEFINED UINT64_MAX
52 : #define SPDK_BDEV_IO_POLL_INTERVAL_IN_MSEC 1000
53 :
54 : /* The maximum number of children requests for a UNMAP or WRITE ZEROES command
55 : * when splitting into children requests at a time.
56 : */
57 : #define SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS (8)
58 : #define BDEV_RESET_CHECK_OUTSTANDING_IO_PERIOD 1000000
59 :
60 : /* The maximum number of children requests for a COPY command
61 : * when splitting into children requests at a time.
62 : */
63 : #define SPDK_BDEV_MAX_CHILDREN_COPY_REQS (8)
64 :
65 : #define LOG_ALREADY_CLAIMED_ERROR(detail, bdev) \
66 : log_already_claimed(SPDK_LOG_ERROR, __LINE__, __func__, detail, bdev)
67 : #ifdef DEBUG
68 : #define LOG_ALREADY_CLAIMED_DEBUG(detail, bdev) \
69 : log_already_claimed(SPDK_LOG_DEBUG, __LINE__, __func__, detail, bdev)
70 : #else
71 : #define LOG_ALREADY_CLAIMED_DEBUG(detail, bdev) do {} while(0)
72 : #endif
73 :
74 : static void log_already_claimed(enum spdk_log_level level, const int line, const char *func,
75 : const char *detail, struct spdk_bdev *bdev);
76 :
77 : static const char *qos_rpc_type[] = {"rw_ios_per_sec",
78 : "rw_mbytes_per_sec", "r_mbytes_per_sec", "w_mbytes_per_sec"
79 : };
80 :
81 : TAILQ_HEAD(spdk_bdev_list, spdk_bdev);
82 :
83 : RB_HEAD(bdev_name_tree, spdk_bdev_name);
84 :
85 : static int
86 546 : bdev_name_cmp(struct spdk_bdev_name *name1, struct spdk_bdev_name *name2)
87 : {
88 546 : return strcmp(name1->name, name2->name);
89 : }
90 :
91 2056 : RB_GENERATE_STATIC(bdev_name_tree, spdk_bdev_name, node, bdev_name_cmp);
92 :
93 : struct spdk_bdev_mgr {
94 : struct spdk_mempool *bdev_io_pool;
95 :
96 : void *zero_buffer;
97 :
98 : TAILQ_HEAD(bdev_module_list, spdk_bdev_module) bdev_modules;
99 :
100 : struct spdk_bdev_list bdevs;
101 : struct bdev_name_tree bdev_names;
102 :
103 : bool init_complete;
104 : bool module_init_complete;
105 :
106 : struct spdk_spinlock spinlock;
107 :
108 : TAILQ_HEAD(, spdk_bdev_open_async_ctx) async_bdev_opens;
109 :
110 : #ifdef SPDK_CONFIG_VTUNE
111 : __itt_domain *domain;
112 : #endif
113 : };
114 :
115 : static struct spdk_bdev_mgr g_bdev_mgr = {
116 : .bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules),
117 : .bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs),
118 : .bdev_names = RB_INITIALIZER(g_bdev_mgr.bdev_names),
119 : .init_complete = false,
120 : .module_init_complete = false,
121 : .async_bdev_opens = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.async_bdev_opens),
122 : };
123 :
124 : static void
125 : __attribute__((constructor))
126 3 : _bdev_init(void)
127 : {
128 3 : spdk_spin_init(&g_bdev_mgr.spinlock);
129 3 : }
130 :
131 : typedef void (*lock_range_cb)(struct lba_range *range, void *ctx, int status);
132 :
133 : typedef void (*bdev_copy_bounce_buffer_cpl)(void *ctx, int rc);
134 :
135 : struct lba_range {
136 : struct spdk_bdev *bdev;
137 : uint64_t offset;
138 : uint64_t length;
139 : bool quiesce;
140 : void *locked_ctx;
141 : struct spdk_thread *owner_thread;
142 : struct spdk_bdev_channel *owner_ch;
143 : TAILQ_ENTRY(lba_range) tailq;
144 : TAILQ_ENTRY(lba_range) tailq_module;
145 : };
146 :
147 : static struct spdk_bdev_opts g_bdev_opts = {
148 : .bdev_io_pool_size = SPDK_BDEV_IO_POOL_SIZE,
149 : .bdev_io_cache_size = SPDK_BDEV_IO_CACHE_SIZE,
150 : .bdev_auto_examine = SPDK_BDEV_AUTO_EXAMINE,
151 : .iobuf_small_cache_size = BUF_SMALL_CACHE_SIZE,
152 : .iobuf_large_cache_size = BUF_LARGE_CACHE_SIZE,
153 : };
154 :
155 : static spdk_bdev_init_cb g_init_cb_fn = NULL;
156 : static void *g_init_cb_arg = NULL;
157 :
158 : static spdk_bdev_fini_cb g_fini_cb_fn = NULL;
159 : static void *g_fini_cb_arg = NULL;
160 : static struct spdk_thread *g_fini_thread = NULL;
161 :
162 : struct spdk_bdev_qos_limit {
163 : /** IOs or bytes allowed per second (i.e., 1s). */
164 : uint64_t limit;
165 :
166 : /** Remaining IOs or bytes allowed in current timeslice (e.g., 1ms).
167 : * For remaining bytes, allowed to run negative if an I/O is submitted when
168 : * some bytes are remaining, but the I/O is bigger than that amount. The
169 : * excess will be deducted from the next timeslice.
170 : */
171 : int64_t remaining_this_timeslice;
172 :
173 : /** Minimum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */
174 : uint32_t min_per_timeslice;
175 :
176 : /** Maximum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */
177 : uint32_t max_per_timeslice;
178 :
179 : /** Function to check whether to queue the IO.
180 : * If The IO is allowed to pass, the quota will be reduced correspondingly.
181 : */
182 : bool (*queue_io)(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io);
183 :
184 : /** Function to rewind the quota once the IO was allowed to be sent by this
185 : * limit but queued due to one of the further limits.
186 : */
187 : void (*rewind_quota)(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io);
188 : };
189 :
190 : struct spdk_bdev_qos {
191 : /** Types of structure of rate limits. */
192 : struct spdk_bdev_qos_limit rate_limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
193 :
194 : /** The channel that all I/O are funneled through. */
195 : struct spdk_bdev_channel *ch;
196 :
197 : /** The thread on which the poller is running. */
198 : struct spdk_thread *thread;
199 :
200 : /** Size of a timeslice in tsc ticks. */
201 : uint64_t timeslice_size;
202 :
203 : /** Timestamp of start of last timeslice. */
204 : uint64_t last_timeslice;
205 :
206 : /** Poller that processes queued I/O commands each time slice. */
207 : struct spdk_poller *poller;
208 : };
209 :
210 : struct spdk_bdev_mgmt_channel {
211 : /*
212 : * Each thread keeps a cache of bdev_io - this allows
213 : * bdev threads which are *not* DPDK threads to still
214 : * benefit from a per-thread bdev_io cache. Without
215 : * this, non-DPDK threads fetching from the mempool
216 : * incur a cmpxchg on get and put.
217 : */
218 : bdev_io_stailq_t per_thread_cache;
219 : uint32_t per_thread_cache_count;
220 : uint32_t bdev_io_cache_size;
221 :
222 : struct spdk_iobuf_channel iobuf;
223 :
224 : TAILQ_HEAD(, spdk_bdev_shared_resource) shared_resources;
225 : TAILQ_HEAD(, spdk_bdev_io_wait_entry) io_wait_queue;
226 : };
227 :
228 : /*
229 : * Per-module (or per-io_device) data. Multiple bdevs built on the same io_device
230 : * will queue here their IO that awaits retry. It makes it possible to retry sending
231 : * IO to one bdev after IO from other bdev completes.
232 : */
233 : struct spdk_bdev_shared_resource {
234 : /* The bdev management channel */
235 : struct spdk_bdev_mgmt_channel *mgmt_ch;
236 :
237 : /*
238 : * Count of I/O submitted to bdev module and waiting for completion.
239 : * Incremented before submit_request() is called on an spdk_bdev_io.
240 : */
241 : uint64_t io_outstanding;
242 :
243 : /*
244 : * Queue of IO awaiting retry because of a previous NOMEM status returned
245 : * on this channel.
246 : */
247 : bdev_io_tailq_t nomem_io;
248 :
249 : /*
250 : * Threshold which io_outstanding must drop to before retrying nomem_io.
251 : */
252 : uint64_t nomem_threshold;
253 :
254 : /* I/O channel allocated by a bdev module */
255 : struct spdk_io_channel *shared_ch;
256 :
257 : struct spdk_poller *nomem_poller;
258 :
259 : /* Refcount of bdev channels using this resource */
260 : uint32_t ref;
261 :
262 : TAILQ_ENTRY(spdk_bdev_shared_resource) link;
263 : };
264 :
265 : #define BDEV_CH_RESET_IN_PROGRESS (1 << 0)
266 : #define BDEV_CH_QOS_ENABLED (1 << 1)
267 :
268 : struct spdk_bdev_channel {
269 : struct spdk_bdev *bdev;
270 :
271 : /* The channel for the underlying device */
272 : struct spdk_io_channel *channel;
273 :
274 : /* Accel channel */
275 : struct spdk_io_channel *accel_channel;
276 :
277 : /* Per io_device per thread data */
278 : struct spdk_bdev_shared_resource *shared_resource;
279 :
280 : struct spdk_bdev_io_stat *stat;
281 :
282 : /*
283 : * Count of I/O submitted to the underlying dev module through this channel
284 : * and waiting for completion.
285 : */
286 : uint64_t io_outstanding;
287 :
288 : /*
289 : * List of all submitted I/Os including I/O that are generated via splitting.
290 : */
291 : bdev_io_tailq_t io_submitted;
292 :
293 : /*
294 : * List of spdk_bdev_io that are currently queued because they write to a locked
295 : * LBA range.
296 : */
297 : bdev_io_tailq_t io_locked;
298 :
299 : /* List of I/Os with accel sequence being currently executed */
300 : bdev_io_tailq_t io_accel_exec;
301 :
302 : /* List of I/Os doing memory domain pull/push */
303 : bdev_io_tailq_t io_memory_domain;
304 :
305 : uint32_t flags;
306 :
307 : /* Counts number of bdev_io in the io_submitted TAILQ */
308 : uint16_t queue_depth;
309 :
310 : uint16_t trace_id;
311 :
312 : struct spdk_histogram_data *histogram;
313 :
314 : #ifdef SPDK_CONFIG_VTUNE
315 : uint64_t start_tsc;
316 : uint64_t interval_tsc;
317 : __itt_string_handle *handle;
318 : struct spdk_bdev_io_stat *prev_stat;
319 : #endif
320 :
321 : bdev_io_tailq_t queued_resets;
322 :
323 : lba_range_tailq_t locked_ranges;
324 :
325 : /** List of I/Os queued by QoS. */
326 : bdev_io_tailq_t qos_queued_io;
327 : };
328 :
329 : struct media_event_entry {
330 : struct spdk_bdev_media_event event;
331 : TAILQ_ENTRY(media_event_entry) tailq;
332 : };
333 :
334 : #define MEDIA_EVENT_POOL_SIZE 64
335 :
336 : struct spdk_bdev_desc {
337 : struct spdk_bdev *bdev;
338 : bool write;
339 : bool memory_domains_supported;
340 : bool accel_sequence_supported[SPDK_BDEV_NUM_IO_TYPES];
341 : struct spdk_bdev_open_opts opts;
342 : struct spdk_thread *thread;
343 : struct {
344 : spdk_bdev_event_cb_t event_fn;
345 : void *ctx;
346 : } callback;
347 : bool closed;
348 : struct spdk_spinlock spinlock;
349 : uint32_t refs;
350 : TAILQ_HEAD(, media_event_entry) pending_media_events;
351 : TAILQ_HEAD(, media_event_entry) free_media_events;
352 : struct media_event_entry *media_events_buffer;
353 : TAILQ_ENTRY(spdk_bdev_desc) link;
354 :
355 : uint64_t timeout_in_sec;
356 : spdk_bdev_io_timeout_cb cb_fn;
357 : void *cb_arg;
358 : struct spdk_poller *io_timeout_poller;
359 : struct spdk_bdev_module_claim *claim;
360 : };
361 :
362 : struct spdk_bdev_iostat_ctx {
363 : struct spdk_bdev_io_stat *stat;
364 : enum spdk_bdev_reset_stat_mode reset_mode;
365 : spdk_bdev_get_device_stat_cb cb;
366 : void *cb_arg;
367 : };
368 :
369 : struct set_qos_limit_ctx {
370 : void (*cb_fn)(void *cb_arg, int status);
371 : void *cb_arg;
372 : struct spdk_bdev *bdev;
373 : };
374 :
375 : struct spdk_bdev_channel_iter {
376 : spdk_bdev_for_each_channel_msg fn;
377 : spdk_bdev_for_each_channel_done cpl;
378 : struct spdk_io_channel_iter *i;
379 : void *ctx;
380 : };
381 :
382 : struct spdk_bdev_io_error_stat {
383 : uint32_t error_status[-SPDK_MIN_BDEV_IO_STATUS];
384 : };
385 :
386 : enum bdev_io_retry_state {
387 : BDEV_IO_RETRY_STATE_INVALID,
388 : BDEV_IO_RETRY_STATE_PULL,
389 : BDEV_IO_RETRY_STATE_PULL_MD,
390 : BDEV_IO_RETRY_STATE_SUBMIT,
391 : BDEV_IO_RETRY_STATE_PUSH,
392 : BDEV_IO_RETRY_STATE_PUSH_MD,
393 : };
394 :
395 : #define __bdev_to_io_dev(bdev) (((char *)bdev) + 1)
396 : #define __bdev_from_io_dev(io_dev) ((struct spdk_bdev *)(((char *)io_dev) - 1))
397 : #define __io_ch_to_bdev_ch(io_ch) ((struct spdk_bdev_channel *)spdk_io_channel_get_ctx(io_ch))
398 : #define __io_ch_to_bdev_mgmt_ch(io_ch) ((struct spdk_bdev_mgmt_channel *)spdk_io_channel_get_ctx(io_ch))
399 :
400 : static inline void bdev_io_complete(void *ctx);
401 : static inline void bdev_io_complete_unsubmitted(struct spdk_bdev_io *bdev_io);
402 : static void bdev_io_push_bounce_md_buf(struct spdk_bdev_io *bdev_io);
403 : static void bdev_io_push_bounce_data(struct spdk_bdev_io *bdev_io);
404 :
405 : static void bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
406 : static int bdev_write_zero_buffer(struct spdk_bdev_io *bdev_io);
407 :
408 : static void bdev_enable_qos_msg(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
409 : struct spdk_io_channel *ch, void *_ctx);
410 : static void bdev_enable_qos_done(struct spdk_bdev *bdev, void *_ctx, int status);
411 :
412 : static int bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
413 : struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
414 : uint64_t num_blocks,
415 : struct spdk_memory_domain *domain, void *domain_ctx,
416 : struct spdk_accel_sequence *seq, uint32_t dif_check_flags,
417 : spdk_bdev_io_completion_cb cb, void *cb_arg);
418 : static int bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
419 : struct iovec *iov, int iovcnt, void *md_buf,
420 : uint64_t offset_blocks, uint64_t num_blocks,
421 : struct spdk_memory_domain *domain, void *domain_ctx,
422 : struct spdk_accel_sequence *seq, uint32_t dif_check_flags,
423 : uint32_t nvme_cdw12_raw, uint32_t nvme_cdw13_raw,
424 : spdk_bdev_io_completion_cb cb, void *cb_arg);
425 :
426 : static int bdev_lock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
427 : uint64_t offset, uint64_t length,
428 : lock_range_cb cb_fn, void *cb_arg);
429 :
430 : static int bdev_unlock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
431 : uint64_t offset, uint64_t length,
432 : lock_range_cb cb_fn, void *cb_arg);
433 :
434 : static bool bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_io *bio_to_abort);
435 : static bool bdev_abort_buf_io(struct spdk_bdev_mgmt_channel *ch, struct spdk_bdev_io *bio_to_abort);
436 :
437 : static bool claim_type_is_v2(enum spdk_bdev_claim_type type);
438 : static void bdev_desc_release_claims(struct spdk_bdev_desc *desc);
439 : static void claim_reset(struct spdk_bdev *bdev);
440 :
441 : static void bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch);
442 :
443 : static bool bdev_io_should_split(struct spdk_bdev_io *bdev_io);
444 :
445 : #define bdev_get_ext_io_opt(opts, field, defval) \
446 : ((opts) != NULL ? SPDK_GET_FIELD(opts, field, defval) : (defval))
447 :
448 : static inline void
449 669 : bdev_ch_add_to_io_submitted(struct spdk_bdev_io *bdev_io)
450 : {
451 669 : TAILQ_INSERT_TAIL(&bdev_io->internal.ch->io_submitted, bdev_io, internal.ch_link);
452 669 : bdev_io->internal.ch->queue_depth++;
453 669 : }
454 :
455 : static inline void
456 669 : bdev_ch_remove_from_io_submitted(struct spdk_bdev_io *bdev_io)
457 : {
458 669 : TAILQ_REMOVE(&bdev_io->internal.ch->io_submitted, bdev_io, internal.ch_link);
459 669 : bdev_io->internal.ch->queue_depth--;
460 669 : }
461 :
462 : void
463 14 : spdk_bdev_get_opts(struct spdk_bdev_opts *opts, size_t opts_size)
464 : {
465 14 : if (!opts) {
466 0 : SPDK_ERRLOG("opts should not be NULL\n");
467 0 : return;
468 : }
469 :
470 14 : if (!opts_size) {
471 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
472 0 : return;
473 : }
474 :
475 14 : opts->opts_size = opts_size;
476 :
477 : #define SET_FIELD(field) \
478 : if (offsetof(struct spdk_bdev_opts, field) + sizeof(opts->field) <= opts_size) { \
479 : opts->field = g_bdev_opts.field; \
480 : } \
481 :
482 14 : SET_FIELD(bdev_io_pool_size);
483 14 : SET_FIELD(bdev_io_cache_size);
484 14 : SET_FIELD(bdev_auto_examine);
485 14 : SET_FIELD(iobuf_small_cache_size);
486 14 : SET_FIELD(iobuf_large_cache_size);
487 :
488 : /* Do not remove this statement, you should always update this statement when you adding a new field,
489 : * and do not forget to add the SET_FIELD statement for your added field. */
490 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_opts) == 32, "Incorrect size");
491 :
492 : #undef SET_FIELD
493 14 : }
494 :
495 : int
496 15 : spdk_bdev_set_opts(struct spdk_bdev_opts *opts)
497 : {
498 : uint32_t min_pool_size;
499 :
500 15 : if (!opts) {
501 0 : SPDK_ERRLOG("opts cannot be NULL\n");
502 0 : return -1;
503 : }
504 :
505 15 : if (!opts->opts_size) {
506 1 : SPDK_ERRLOG("opts_size inside opts cannot be zero value\n");
507 1 : return -1;
508 : }
509 :
510 : /*
511 : * Add 1 to the thread count to account for the extra mgmt_ch that gets created during subsystem
512 : * initialization. A second mgmt_ch will be created on the same thread when the application starts
513 : * but before the deferred put_io_channel event is executed for the first mgmt_ch.
514 : */
515 14 : min_pool_size = opts->bdev_io_cache_size * (spdk_thread_get_count() + 1);
516 14 : if (opts->bdev_io_pool_size < min_pool_size) {
517 0 : SPDK_ERRLOG("bdev_io_pool_size %" PRIu32 " is not compatible with bdev_io_cache_size %" PRIu32
518 : " and %" PRIu32 " threads\n", opts->bdev_io_pool_size, opts->bdev_io_cache_size,
519 : spdk_thread_get_count());
520 0 : SPDK_ERRLOG("bdev_io_pool_size must be at least %" PRIu32 "\n", min_pool_size);
521 0 : return -1;
522 : }
523 :
524 : #define SET_FIELD(field) \
525 : if (offsetof(struct spdk_bdev_opts, field) + sizeof(opts->field) <= opts->opts_size) { \
526 : g_bdev_opts.field = opts->field; \
527 : } \
528 :
529 14 : SET_FIELD(bdev_io_pool_size);
530 14 : SET_FIELD(bdev_io_cache_size);
531 14 : SET_FIELD(bdev_auto_examine);
532 14 : SET_FIELD(iobuf_small_cache_size);
533 14 : SET_FIELD(iobuf_large_cache_size);
534 :
535 14 : g_bdev_opts.opts_size = opts->opts_size;
536 :
537 : #undef SET_FIELD
538 :
539 14 : return 0;
540 15 : }
541 :
542 : static struct spdk_bdev *
543 146 : bdev_get_by_name(const char *bdev_name)
544 : {
545 : struct spdk_bdev_name find;
546 : struct spdk_bdev_name *res;
547 :
548 146 : find.name = (char *)bdev_name;
549 146 : res = RB_FIND(bdev_name_tree, &g_bdev_mgr.bdev_names, &find);
550 146 : if (res != NULL) {
551 139 : return res->bdev;
552 : }
553 :
554 7 : return NULL;
555 146 : }
556 :
557 : struct spdk_bdev *
558 16 : spdk_bdev_get_by_name(const char *bdev_name)
559 : {
560 : struct spdk_bdev *bdev;
561 :
562 16 : spdk_spin_lock(&g_bdev_mgr.spinlock);
563 16 : bdev = bdev_get_by_name(bdev_name);
564 16 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
565 :
566 16 : return bdev;
567 : }
568 :
569 : struct bdev_io_status_string {
570 : enum spdk_bdev_io_status status;
571 : const char *str;
572 : };
573 :
574 : static const struct bdev_io_status_string bdev_io_status_strings[] = {
575 : { SPDK_BDEV_IO_STATUS_AIO_ERROR, "aio_error" },
576 : { SPDK_BDEV_IO_STATUS_ABORTED, "aborted" },
577 : { SPDK_BDEV_IO_STATUS_FIRST_FUSED_FAILED, "first_fused_failed" },
578 : { SPDK_BDEV_IO_STATUS_MISCOMPARE, "miscompare" },
579 : { SPDK_BDEV_IO_STATUS_NOMEM, "nomem" },
580 : { SPDK_BDEV_IO_STATUS_SCSI_ERROR, "scsi_error" },
581 : { SPDK_BDEV_IO_STATUS_NVME_ERROR, "nvme_error" },
582 : { SPDK_BDEV_IO_STATUS_FAILED, "failed" },
583 : { SPDK_BDEV_IO_STATUS_PENDING, "pending" },
584 : { SPDK_BDEV_IO_STATUS_SUCCESS, "success" },
585 : };
586 :
587 : static const char *
588 0 : bdev_io_status_get_string(enum spdk_bdev_io_status status)
589 : {
590 : uint32_t i;
591 :
592 0 : for (i = 0; i < SPDK_COUNTOF(bdev_io_status_strings); i++) {
593 0 : if (bdev_io_status_strings[i].status == status) {
594 0 : return bdev_io_status_strings[i].str;
595 : }
596 0 : }
597 :
598 0 : return "reserved";
599 0 : }
600 :
601 : struct spdk_bdev_wait_for_examine_ctx {
602 : struct spdk_poller *poller;
603 : spdk_bdev_wait_for_examine_cb cb_fn;
604 : void *cb_arg;
605 : };
606 :
607 : static bool bdev_module_all_actions_completed(void);
608 :
609 : static int
610 192 : bdev_wait_for_examine_cb(void *arg)
611 : {
612 192 : struct spdk_bdev_wait_for_examine_ctx *ctx = arg;
613 :
614 192 : if (!bdev_module_all_actions_completed()) {
615 0 : return SPDK_POLLER_IDLE;
616 : }
617 :
618 192 : spdk_poller_unregister(&ctx->poller);
619 192 : ctx->cb_fn(ctx->cb_arg);
620 192 : free(ctx);
621 :
622 192 : return SPDK_POLLER_BUSY;
623 192 : }
624 :
625 : int
626 192 : spdk_bdev_wait_for_examine(spdk_bdev_wait_for_examine_cb cb_fn, void *cb_arg)
627 : {
628 : struct spdk_bdev_wait_for_examine_ctx *ctx;
629 :
630 192 : ctx = calloc(1, sizeof(*ctx));
631 192 : if (ctx == NULL) {
632 0 : return -ENOMEM;
633 : }
634 192 : ctx->cb_fn = cb_fn;
635 192 : ctx->cb_arg = cb_arg;
636 192 : ctx->poller = SPDK_POLLER_REGISTER(bdev_wait_for_examine_cb, ctx, 0);
637 :
638 192 : return 0;
639 192 : }
640 :
641 : struct spdk_bdev_examine_item {
642 : char *name;
643 : TAILQ_ENTRY(spdk_bdev_examine_item) link;
644 : };
645 :
646 : TAILQ_HEAD(spdk_bdev_examine_allowlist, spdk_bdev_examine_item);
647 :
648 : struct spdk_bdev_examine_allowlist g_bdev_examine_allowlist = TAILQ_HEAD_INITIALIZER(
649 : g_bdev_examine_allowlist);
650 :
651 : static inline bool
652 6 : bdev_examine_allowlist_check(const char *name)
653 : {
654 : struct spdk_bdev_examine_item *item;
655 6 : TAILQ_FOREACH(item, &g_bdev_examine_allowlist, link) {
656 0 : if (strcmp(name, item->name) == 0) {
657 0 : return true;
658 : }
659 0 : }
660 6 : return false;
661 6 : }
662 :
663 : static inline void
664 65 : bdev_examine_allowlist_free(void)
665 : {
666 : struct spdk_bdev_examine_item *item;
667 65 : while (!TAILQ_EMPTY(&g_bdev_examine_allowlist)) {
668 0 : item = TAILQ_FIRST(&g_bdev_examine_allowlist);
669 0 : TAILQ_REMOVE(&g_bdev_examine_allowlist, item, link);
670 0 : free(item->name);
671 0 : free(item);
672 : }
673 65 : }
674 :
675 : static inline bool
676 3 : bdev_in_examine_allowlist(struct spdk_bdev *bdev)
677 : {
678 : struct spdk_bdev_alias *tmp;
679 3 : if (bdev_examine_allowlist_check(bdev->name)) {
680 0 : return true;
681 : }
682 6 : TAILQ_FOREACH(tmp, &bdev->aliases, tailq) {
683 3 : if (bdev_examine_allowlist_check(tmp->alias.name)) {
684 0 : return true;
685 : }
686 3 : }
687 3 : return false;
688 3 : }
689 :
690 : static inline bool
691 125 : bdev_ok_to_examine(struct spdk_bdev *bdev)
692 : {
693 : /* Some bdevs may not support the READ command.
694 : * Do not try to examine them.
695 : */
696 125 : if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_READ)) {
697 0 : return false;
698 : }
699 :
700 125 : if (g_bdev_opts.bdev_auto_examine) {
701 122 : return true;
702 : } else {
703 3 : return bdev_in_examine_allowlist(bdev);
704 : }
705 125 : }
706 :
707 : static void
708 125 : bdev_examine(struct spdk_bdev *bdev)
709 : {
710 : struct spdk_bdev_module *module;
711 : struct spdk_bdev_module_claim *claim, *tmpclaim;
712 : uint32_t action;
713 :
714 125 : if (!bdev_ok_to_examine(bdev)) {
715 3 : return;
716 : }
717 :
718 499 : TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
719 377 : if (module->examine_config) {
720 255 : spdk_spin_lock(&module->internal.spinlock);
721 255 : action = module->internal.action_in_progress;
722 255 : module->internal.action_in_progress++;
723 255 : spdk_spin_unlock(&module->internal.spinlock);
724 255 : module->examine_config(bdev);
725 255 : if (action != module->internal.action_in_progress) {
726 0 : SPDK_ERRLOG("examine_config for module %s did not call "
727 : "spdk_bdev_module_examine_done()\n", module->name);
728 0 : }
729 255 : }
730 377 : }
731 :
732 122 : spdk_spin_lock(&bdev->internal.spinlock);
733 :
734 122 : switch (bdev->internal.claim_type) {
735 : case SPDK_BDEV_CLAIM_NONE:
736 : /* Examine by all bdev modules */
737 474 : TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
738 357 : if (module->examine_disk) {
739 231 : spdk_spin_lock(&module->internal.spinlock);
740 231 : module->internal.action_in_progress++;
741 231 : spdk_spin_unlock(&module->internal.spinlock);
742 231 : spdk_spin_unlock(&bdev->internal.spinlock);
743 231 : module->examine_disk(bdev);
744 231 : spdk_spin_lock(&bdev->internal.spinlock);
745 231 : }
746 357 : }
747 117 : break;
748 : case SPDK_BDEV_CLAIM_EXCL_WRITE:
749 : /* Examine by the one bdev module with a v1 claim */
750 1 : module = bdev->internal.claim.v1.module;
751 1 : if (module->examine_disk) {
752 1 : spdk_spin_lock(&module->internal.spinlock);
753 1 : module->internal.action_in_progress++;
754 1 : spdk_spin_unlock(&module->internal.spinlock);
755 1 : spdk_spin_unlock(&bdev->internal.spinlock);
756 1 : module->examine_disk(bdev);
757 1 : return;
758 : }
759 0 : break;
760 : default:
761 : /* Examine by all bdev modules with a v2 claim */
762 4 : assert(claim_type_is_v2(bdev->internal.claim_type));
763 : /*
764 : * Removal of tailq nodes while iterating can cause the iteration to jump out of the
765 : * list, perhaps accessing freed memory. Without protection, this could happen
766 : * while the lock is dropped during the examine callback.
767 : */
768 4 : bdev->internal.examine_in_progress++;
769 :
770 9 : TAILQ_FOREACH(claim, &bdev->internal.claim.v2.claims, link) {
771 5 : module = claim->module;
772 :
773 5 : if (module == NULL) {
774 : /* This is a vestigial claim, held by examine_count */
775 0 : continue;
776 : }
777 :
778 5 : if (module->examine_disk == NULL) {
779 0 : continue;
780 : }
781 :
782 5 : spdk_spin_lock(&module->internal.spinlock);
783 5 : module->internal.action_in_progress++;
784 5 : spdk_spin_unlock(&module->internal.spinlock);
785 :
786 : /* Call examine_disk without holding internal.spinlock. */
787 5 : spdk_spin_unlock(&bdev->internal.spinlock);
788 5 : module->examine_disk(bdev);
789 5 : spdk_spin_lock(&bdev->internal.spinlock);
790 5 : }
791 :
792 4 : assert(bdev->internal.examine_in_progress > 0);
793 4 : bdev->internal.examine_in_progress--;
794 4 : if (bdev->internal.examine_in_progress == 0) {
795 : /* Remove any claims that were released during examine_disk */
796 9 : TAILQ_FOREACH_SAFE(claim, &bdev->internal.claim.v2.claims, link, tmpclaim) {
797 5 : if (claim->desc != NULL) {
798 5 : continue;
799 : }
800 :
801 0 : TAILQ_REMOVE(&bdev->internal.claim.v2.claims, claim, link);
802 0 : free(claim);
803 0 : }
804 4 : if (TAILQ_EMPTY(&bdev->internal.claim.v2.claims)) {
805 0 : claim_reset(bdev);
806 0 : }
807 4 : }
808 4 : }
809 :
810 121 : spdk_spin_unlock(&bdev->internal.spinlock);
811 125 : }
812 :
813 : int
814 1 : spdk_bdev_examine(const char *name)
815 : {
816 : struct spdk_bdev *bdev;
817 : struct spdk_bdev_examine_item *item;
818 1 : struct spdk_thread *thread = spdk_get_thread();
819 :
820 1 : if (spdk_unlikely(!spdk_thread_is_app_thread(thread))) {
821 1 : SPDK_ERRLOG("Cannot examine bdev %s on thread %p (%s)\n", name, thread,
822 : thread ? spdk_thread_get_name(thread) : "null");
823 1 : return -EINVAL;
824 : }
825 :
826 0 : if (g_bdev_opts.bdev_auto_examine) {
827 0 : SPDK_ERRLOG("Manual examine is not allowed if auto examine is enabled\n");
828 0 : return -EINVAL;
829 : }
830 :
831 0 : if (bdev_examine_allowlist_check(name)) {
832 0 : SPDK_ERRLOG("Duplicate bdev name for manual examine: %s\n", name);
833 0 : return -EEXIST;
834 : }
835 :
836 0 : item = calloc(1, sizeof(*item));
837 0 : if (!item) {
838 0 : return -ENOMEM;
839 : }
840 0 : item->name = strdup(name);
841 0 : if (!item->name) {
842 0 : free(item);
843 0 : return -ENOMEM;
844 : }
845 0 : TAILQ_INSERT_TAIL(&g_bdev_examine_allowlist, item, link);
846 :
847 0 : bdev = spdk_bdev_get_by_name(name);
848 0 : if (bdev) {
849 0 : bdev_examine(bdev);
850 0 : }
851 0 : return 0;
852 1 : }
853 :
854 : static inline void
855 0 : bdev_examine_allowlist_config_json(struct spdk_json_write_ctx *w)
856 : {
857 : struct spdk_bdev_examine_item *item;
858 0 : TAILQ_FOREACH(item, &g_bdev_examine_allowlist, link) {
859 0 : spdk_json_write_object_begin(w);
860 0 : spdk_json_write_named_string(w, "method", "bdev_examine");
861 0 : spdk_json_write_named_object_begin(w, "params");
862 0 : spdk_json_write_named_string(w, "name", item->name);
863 0 : spdk_json_write_object_end(w);
864 0 : spdk_json_write_object_end(w);
865 0 : }
866 0 : }
867 :
868 : struct spdk_bdev *
869 1 : spdk_bdev_first(void)
870 : {
871 : struct spdk_bdev *bdev;
872 :
873 1 : bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs);
874 1 : if (bdev) {
875 1 : SPDK_DEBUGLOG(bdev, "Starting bdev iteration at %s\n", bdev->name);
876 1 : }
877 :
878 1 : return bdev;
879 : }
880 :
881 : struct spdk_bdev *
882 8 : spdk_bdev_next(struct spdk_bdev *prev)
883 : {
884 : struct spdk_bdev *bdev;
885 :
886 8 : bdev = TAILQ_NEXT(prev, internal.link);
887 8 : if (bdev) {
888 7 : SPDK_DEBUGLOG(bdev, "Continuing bdev iteration at %s\n", bdev->name);
889 7 : }
890 :
891 8 : return bdev;
892 : }
893 :
894 : static struct spdk_bdev *
895 6 : _bdev_next_leaf(struct spdk_bdev *bdev)
896 : {
897 9 : while (bdev != NULL) {
898 8 : if (bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE) {
899 5 : return bdev;
900 : } else {
901 3 : bdev = TAILQ_NEXT(bdev, internal.link);
902 : }
903 : }
904 :
905 1 : return bdev;
906 6 : }
907 :
908 : struct spdk_bdev *
909 1 : spdk_bdev_first_leaf(void)
910 : {
911 : struct spdk_bdev *bdev;
912 :
913 1 : bdev = _bdev_next_leaf(TAILQ_FIRST(&g_bdev_mgr.bdevs));
914 :
915 1 : if (bdev) {
916 1 : SPDK_DEBUGLOG(bdev, "Starting bdev iteration at %s\n", bdev->name);
917 1 : }
918 :
919 1 : return bdev;
920 : }
921 :
922 : struct spdk_bdev *
923 5 : spdk_bdev_next_leaf(struct spdk_bdev *prev)
924 : {
925 : struct spdk_bdev *bdev;
926 :
927 5 : bdev = _bdev_next_leaf(TAILQ_NEXT(prev, internal.link));
928 :
929 5 : if (bdev) {
930 4 : SPDK_DEBUGLOG(bdev, "Continuing bdev iteration at %s\n", bdev->name);
931 4 : }
932 :
933 5 : return bdev;
934 : }
935 :
936 : static inline bool
937 816 : bdev_io_use_memory_domain(struct spdk_bdev_io *bdev_io)
938 : {
939 816 : return bdev_io->internal.f.has_memory_domain;
940 : }
941 :
942 : static inline bool
943 1553 : bdev_io_use_accel_sequence(struct spdk_bdev_io *bdev_io)
944 : {
945 1553 : return bdev_io->internal.f.has_accel_sequence;
946 : }
947 :
948 : static inline void
949 7 : bdev_queue_nomem_io_head(struct spdk_bdev_shared_resource *shared_resource,
950 : struct spdk_bdev_io *bdev_io, enum bdev_io_retry_state state)
951 : {
952 : /* Wait for some of the outstanding I/O to complete before we retry any of the nomem_io.
953 : * Normally we will wait for NOMEM_THRESHOLD_COUNT I/O to complete but for low queue depth
954 : * channels we will instead wait for half to complete.
955 : */
956 7 : shared_resource->nomem_threshold = spdk_max((int64_t)shared_resource->io_outstanding / 2,
957 : (int64_t)shared_resource->io_outstanding - NOMEM_THRESHOLD_COUNT);
958 :
959 7 : assert(state != BDEV_IO_RETRY_STATE_INVALID);
960 7 : bdev_io->internal.retry_state = state;
961 7 : TAILQ_INSERT_HEAD(&shared_resource->nomem_io, bdev_io, internal.link);
962 7 : }
963 :
964 : static inline void
965 43 : bdev_queue_nomem_io_tail(struct spdk_bdev_shared_resource *shared_resource,
966 : struct spdk_bdev_io *bdev_io, enum bdev_io_retry_state state)
967 : {
968 : /* We only queue IOs at the end of the nomem_io queue if they're submitted by the user while
969 : * the queue isn't empty, so we don't need to update the nomem_threshold here */
970 43 : assert(!TAILQ_EMPTY(&shared_resource->nomem_io));
971 :
972 43 : assert(state != BDEV_IO_RETRY_STATE_INVALID);
973 43 : bdev_io->internal.retry_state = state;
974 43 : TAILQ_INSERT_TAIL(&shared_resource->nomem_io, bdev_io, internal.link);
975 43 : }
976 :
977 : void
978 16 : spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
979 : {
980 : struct iovec *iovs;
981 :
982 16 : if (bdev_io->u.bdev.iovs == NULL) {
983 3 : bdev_io->u.bdev.iovs = &bdev_io->iov;
984 3 : bdev_io->u.bdev.iovcnt = 1;
985 3 : }
986 :
987 16 : iovs = bdev_io->u.bdev.iovs;
988 :
989 16 : assert(iovs != NULL);
990 16 : assert(bdev_io->u.bdev.iovcnt >= 1);
991 :
992 16 : iovs[0].iov_base = buf;
993 16 : iovs[0].iov_len = len;
994 16 : }
995 :
996 : void
997 3 : spdk_bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len)
998 : {
999 3 : assert((len / spdk_bdev_get_md_size(bdev_io->bdev)) >= bdev_io->u.bdev.num_blocks);
1000 3 : bdev_io->u.bdev.md_buf = md_buf;
1001 3 : }
1002 :
1003 : static bool
1004 167 : _is_buf_allocated(const struct iovec *iovs)
1005 : {
1006 167 : if (iovs == NULL) {
1007 6 : return false;
1008 : }
1009 :
1010 161 : return iovs[0].iov_base != NULL;
1011 167 : }
1012 :
1013 : static bool
1014 50 : _are_iovs_aligned(struct iovec *iovs, int iovcnt, uint32_t alignment)
1015 : {
1016 : int i;
1017 : uintptr_t iov_base;
1018 :
1019 50 : if (spdk_likely(alignment == 1)) {
1020 21 : return true;
1021 : }
1022 :
1023 36 : for (i = 0; i < iovcnt; i++) {
1024 29 : iov_base = (uintptr_t)iovs[i].iov_base;
1025 29 : if ((iov_base & (alignment - 1)) != 0) {
1026 22 : return false;
1027 : }
1028 7 : }
1029 :
1030 7 : return true;
1031 50 : }
1032 :
1033 : static inline bool
1034 856 : bdev_io_needs_sequence_exec(struct spdk_bdev_desc *desc, struct spdk_bdev_io *bdev_io)
1035 : {
1036 856 : if (!bdev_io_use_accel_sequence(bdev_io)) {
1037 856 : return false;
1038 : }
1039 :
1040 : /* For now, we don't allow splitting IOs with an accel sequence and will treat them as if
1041 : * bdev module didn't support accel sequences */
1042 0 : return !desc->accel_sequence_supported[bdev_io->type] || bdev_io->internal.f.split;
1043 856 : }
1044 :
1045 : static inline void
1046 592 : bdev_io_increment_outstanding(struct spdk_bdev_channel *bdev_ch,
1047 : struct spdk_bdev_shared_resource *shared_resource)
1048 : {
1049 592 : bdev_ch->io_outstanding++;
1050 592 : shared_resource->io_outstanding++;
1051 592 : }
1052 :
1053 : static inline void
1054 592 : bdev_io_decrement_outstanding(struct spdk_bdev_channel *bdev_ch,
1055 : struct spdk_bdev_shared_resource *shared_resource)
1056 : {
1057 592 : assert(bdev_ch->io_outstanding > 0);
1058 592 : assert(shared_resource->io_outstanding > 0);
1059 592 : bdev_ch->io_outstanding--;
1060 592 : shared_resource->io_outstanding--;
1061 592 : }
1062 :
1063 : static void
1064 0 : bdev_io_submit_sequence_cb(void *ctx, int status)
1065 : {
1066 0 : struct spdk_bdev_io *bdev_io = ctx;
1067 :
1068 0 : assert(bdev_io_use_accel_sequence(bdev_io));
1069 :
1070 0 : bdev_io->u.bdev.accel_sequence = NULL;
1071 0 : bdev_io->internal.f.has_accel_sequence = false;
1072 :
1073 0 : if (spdk_unlikely(status != 0)) {
1074 0 : SPDK_ERRLOG("Failed to execute accel sequence, status=%d\n", status);
1075 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
1076 0 : bdev_io_complete_unsubmitted(bdev_io);
1077 0 : return;
1078 : }
1079 :
1080 0 : bdev_io_submit(bdev_io);
1081 0 : }
1082 :
1083 : static void
1084 0 : bdev_io_exec_sequence_cb(void *ctx, int status)
1085 : {
1086 0 : struct spdk_bdev_io *bdev_io = ctx;
1087 0 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1088 :
1089 0 : TAILQ_REMOVE(&bdev_io->internal.ch->io_accel_exec, bdev_io, internal.link);
1090 0 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1091 :
1092 0 : if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
1093 0 : bdev_ch_retry_io(ch);
1094 0 : }
1095 :
1096 0 : bdev_io->internal.data_transfer_cpl(bdev_io, status);
1097 0 : }
1098 :
1099 : static void
1100 0 : bdev_io_exec_sequence(struct spdk_bdev_io *bdev_io, void (*cb_fn)(void *ctx, int status))
1101 : {
1102 0 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1103 :
1104 0 : assert(bdev_io_needs_sequence_exec(bdev_io->internal.desc, bdev_io));
1105 0 : assert(bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE || bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
1106 0 : assert(bdev_io_use_accel_sequence(bdev_io));
1107 :
1108 : /* Since the operations are appended during submission, they're in the opposite order than
1109 : * how we want to execute them for reads (i.e. we need to execute the most recently added
1110 : * operation first), so reverse the sequence before executing it.
1111 : */
1112 0 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
1113 0 : spdk_accel_sequence_reverse(bdev_io->internal.accel_sequence);
1114 0 : }
1115 :
1116 0 : TAILQ_INSERT_TAIL(&bdev_io->internal.ch->io_accel_exec, bdev_io, internal.link);
1117 0 : bdev_io_increment_outstanding(ch, ch->shared_resource);
1118 0 : bdev_io->internal.data_transfer_cpl = cb_fn;
1119 :
1120 0 : spdk_accel_sequence_finish(bdev_io->internal.accel_sequence,
1121 0 : bdev_io_exec_sequence_cb, bdev_io);
1122 0 : }
1123 :
1124 : static void
1125 42 : bdev_io_get_buf_complete(struct spdk_bdev_io *bdev_io, bool status)
1126 : {
1127 42 : struct spdk_io_channel *ch = spdk_bdev_io_get_io_channel(bdev_io);
1128 : void *buf;
1129 :
1130 42 : if (spdk_unlikely(bdev_io->internal.get_aux_buf_cb != NULL)) {
1131 0 : buf = bdev_io->internal.buf.ptr;
1132 0 : bdev_io->internal.buf.ptr = NULL;
1133 0 : bdev_io->internal.f.has_buf = false;
1134 0 : bdev_io->internal.get_aux_buf_cb(ch, bdev_io, buf);
1135 0 : bdev_io->internal.get_aux_buf_cb = NULL;
1136 0 : } else {
1137 42 : assert(bdev_io->internal.get_buf_cb != NULL);
1138 42 : bdev_io->internal.get_buf_cb(ch, bdev_io, status);
1139 42 : bdev_io->internal.get_buf_cb = NULL;
1140 : }
1141 42 : }
1142 :
1143 : static void
1144 4 : _bdev_io_pull_buffer_cpl(void *ctx, int rc)
1145 : {
1146 4 : struct spdk_bdev_io *bdev_io = ctx;
1147 :
1148 4 : if (rc) {
1149 0 : SPDK_ERRLOG("Set bounce buffer failed with rc %d\n", rc);
1150 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
1151 0 : }
1152 4 : bdev_io_get_buf_complete(bdev_io, !rc);
1153 4 : }
1154 :
1155 : static void
1156 2 : bdev_io_pull_md_buf_done(void *ctx, int status)
1157 : {
1158 2 : struct spdk_bdev_io *bdev_io = ctx;
1159 2 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1160 :
1161 2 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1162 2 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1163 :
1164 2 : if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
1165 0 : bdev_ch_retry_io(ch);
1166 0 : }
1167 :
1168 2 : assert(bdev_io->internal.data_transfer_cpl);
1169 2 : bdev_io->internal.data_transfer_cpl(bdev_io, status);
1170 2 : }
1171 :
1172 : static void
1173 4 : bdev_io_pull_md_buf(struct spdk_bdev_io *bdev_io)
1174 : {
1175 4 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1176 4 : int rc = 0;
1177 :
1178 4 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
1179 2 : assert(bdev_io->internal.f.has_bounce_buf);
1180 2 : if (bdev_io_use_memory_domain(bdev_io)) {
1181 2 : TAILQ_INSERT_TAIL(&ch->io_memory_domain, bdev_io, internal.link);
1182 2 : bdev_io_increment_outstanding(ch, ch->shared_resource);
1183 4 : rc = spdk_memory_domain_pull_data(bdev_io->internal.memory_domain,
1184 2 : bdev_io->internal.memory_domain_ctx,
1185 2 : &bdev_io->internal.bounce_buf.orig_md_iov, 1,
1186 2 : &bdev_io->internal.bounce_buf.md_iov, 1,
1187 2 : bdev_io_pull_md_buf_done, bdev_io);
1188 2 : if (rc == 0) {
1189 : /* Continue to submit IO in completion callback */
1190 2 : return;
1191 : }
1192 0 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1193 0 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1194 0 : if (rc != -ENOMEM) {
1195 0 : SPDK_ERRLOG("Failed to pull data from memory domain %s, rc %d\n",
1196 : spdk_memory_domain_get_dma_device_id(
1197 : bdev_io->internal.memory_domain), rc);
1198 0 : }
1199 0 : } else {
1200 0 : memcpy(bdev_io->internal.bounce_buf.md_iov.iov_base,
1201 0 : bdev_io->internal.bounce_buf.orig_md_iov.iov_base,
1202 0 : bdev_io->internal.bounce_buf.orig_md_iov.iov_len);
1203 : }
1204 0 : }
1205 :
1206 2 : if (spdk_unlikely(rc == -ENOMEM)) {
1207 0 : bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL_MD);
1208 0 : } else {
1209 2 : assert(bdev_io->internal.data_transfer_cpl);
1210 2 : bdev_io->internal.data_transfer_cpl(bdev_io, rc);
1211 : }
1212 4 : }
1213 :
1214 : static void
1215 4 : _bdev_io_pull_bounce_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len)
1216 : {
1217 4 : assert(bdev_io->internal.f.has_bounce_buf);
1218 :
1219 : /* save original md_buf */
1220 4 : bdev_io->internal.bounce_buf.orig_md_iov.iov_base = bdev_io->u.bdev.md_buf;
1221 4 : bdev_io->internal.bounce_buf.orig_md_iov.iov_len = len;
1222 4 : bdev_io->internal.bounce_buf.md_iov.iov_base = md_buf;
1223 4 : bdev_io->internal.bounce_buf.md_iov.iov_len = len;
1224 : /* set bounce md_buf */
1225 4 : bdev_io->u.bdev.md_buf = md_buf;
1226 :
1227 4 : bdev_io_pull_md_buf(bdev_io);
1228 4 : }
1229 :
1230 : static void
1231 42 : _bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io)
1232 : {
1233 42 : struct spdk_bdev *bdev = bdev_io->bdev;
1234 : uint64_t md_len;
1235 : void *buf;
1236 :
1237 42 : if (spdk_bdev_is_md_separate(bdev)) {
1238 7 : assert(!bdev_io_use_accel_sequence(bdev_io));
1239 :
1240 7 : buf = (char *)bdev_io->u.bdev.iovs[0].iov_base + bdev_io->u.bdev.iovs[0].iov_len;
1241 7 : md_len = bdev_io->u.bdev.num_blocks * bdev->md_len;
1242 :
1243 7 : assert(((uintptr_t)buf & (spdk_bdev_get_buf_align(bdev) - 1)) == 0);
1244 :
1245 7 : if (bdev_io->u.bdev.md_buf != NULL) {
1246 4 : _bdev_io_pull_bounce_md_buf(bdev_io, buf, md_len);
1247 4 : return;
1248 : } else {
1249 3 : spdk_bdev_io_set_md_buf(bdev_io, buf, md_len);
1250 : }
1251 3 : }
1252 :
1253 38 : bdev_io_get_buf_complete(bdev_io, true);
1254 42 : }
1255 :
1256 : static inline void
1257 26 : bdev_io_pull_data_done(struct spdk_bdev_io *bdev_io, int rc)
1258 : {
1259 26 : if (rc) {
1260 0 : SPDK_ERRLOG("Failed to get data buffer\n");
1261 0 : assert(bdev_io->internal.data_transfer_cpl);
1262 0 : bdev_io->internal.data_transfer_cpl(bdev_io, rc);
1263 0 : return;
1264 : }
1265 :
1266 26 : _bdev_io_set_md_buf(bdev_io);
1267 26 : }
1268 :
1269 : static void
1270 2 : bdev_io_pull_data_done_and_track(void *ctx, int status)
1271 : {
1272 2 : struct spdk_bdev_io *bdev_io = ctx;
1273 2 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1274 :
1275 2 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1276 2 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1277 :
1278 2 : if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
1279 0 : bdev_ch_retry_io(ch);
1280 0 : }
1281 :
1282 2 : bdev_io_pull_data_done(bdev_io, status);
1283 2 : }
1284 :
1285 : static void
1286 27 : bdev_io_pull_data(struct spdk_bdev_io *bdev_io)
1287 : {
1288 27 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1289 27 : int rc = 0;
1290 :
1291 : /* If we need to exec an accel sequence or the IO uses a memory domain buffer and has a
1292 : * sequence, append a copy operation making accel change the src/dst buffers of the previous
1293 : * operation */
1294 27 : if (bdev_io_needs_sequence_exec(bdev_io->internal.desc, bdev_io) ||
1295 27 : (bdev_io_use_accel_sequence(bdev_io) && bdev_io_use_memory_domain(bdev_io))) {
1296 0 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
1297 0 : assert(bdev_io_use_accel_sequence(bdev_io));
1298 0 : assert(bdev_io->internal.f.has_bounce_buf);
1299 0 : rc = spdk_accel_append_copy(&bdev_io->internal.accel_sequence, ch->accel_channel,
1300 0 : bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
1301 : NULL, NULL,
1302 0 : bdev_io->internal.bounce_buf.orig_iovs,
1303 0 : bdev_io->internal.bounce_buf.orig_iovcnt,
1304 0 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain : NULL,
1305 0 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain_ctx : NULL,
1306 : NULL, NULL);
1307 0 : } else {
1308 : /* We need to reverse the src/dst for reads */
1309 0 : assert(bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
1310 0 : assert(bdev_io_use_accel_sequence(bdev_io));
1311 0 : assert(bdev_io->internal.f.has_bounce_buf);
1312 0 : rc = spdk_accel_append_copy(&bdev_io->internal.accel_sequence, ch->accel_channel,
1313 0 : bdev_io->internal.bounce_buf.orig_iovs,
1314 0 : bdev_io->internal.bounce_buf.orig_iovcnt,
1315 0 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain : NULL,
1316 0 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain_ctx : NULL,
1317 0 : bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
1318 : NULL, NULL, NULL, NULL);
1319 : }
1320 :
1321 0 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
1322 0 : SPDK_ERRLOG("Failed to append copy to accel sequence: %p\n",
1323 : bdev_io->internal.accel_sequence);
1324 0 : }
1325 27 : } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
1326 : /* if this is write path, copy data from original buffer to bounce buffer */
1327 17 : if (bdev_io_use_memory_domain(bdev_io)) {
1328 3 : assert(bdev_io->internal.f.has_bounce_buf);
1329 3 : TAILQ_INSERT_TAIL(&ch->io_memory_domain, bdev_io, internal.link);
1330 3 : bdev_io_increment_outstanding(ch, ch->shared_resource);
1331 6 : rc = spdk_memory_domain_pull_data(bdev_io->internal.memory_domain,
1332 3 : bdev_io->internal.memory_domain_ctx,
1333 3 : bdev_io->internal.bounce_buf.orig_iovs,
1334 3 : (uint32_t)bdev_io->internal.bounce_buf.orig_iovcnt,
1335 3 : bdev_io->u.bdev.iovs, 1,
1336 : bdev_io_pull_data_done_and_track,
1337 3 : bdev_io);
1338 3 : if (rc == 0) {
1339 : /* Continue to submit IO in completion callback */
1340 2 : return;
1341 : }
1342 1 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1343 1 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1344 1 : if (rc != -ENOMEM) {
1345 0 : SPDK_ERRLOG("Failed to pull data from memory domain %s\n",
1346 : spdk_memory_domain_get_dma_device_id(
1347 : bdev_io->internal.memory_domain));
1348 0 : }
1349 1 : } else {
1350 14 : assert(bdev_io->u.bdev.iovcnt == 1);
1351 14 : assert(bdev_io->internal.f.has_bounce_buf);
1352 28 : spdk_copy_iovs_to_buf(bdev_io->u.bdev.iovs[0].iov_base,
1353 14 : bdev_io->u.bdev.iovs[0].iov_len,
1354 14 : bdev_io->internal.bounce_buf.orig_iovs,
1355 14 : bdev_io->internal.bounce_buf.orig_iovcnt);
1356 : }
1357 15 : }
1358 :
1359 25 : if (spdk_unlikely(rc == -ENOMEM)) {
1360 1 : bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL);
1361 1 : } else {
1362 24 : bdev_io_pull_data_done(bdev_io, rc);
1363 : }
1364 27 : }
1365 :
1366 : static void
1367 26 : _bdev_io_pull_bounce_data_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len,
1368 : bdev_copy_bounce_buffer_cpl cpl_cb)
1369 : {
1370 26 : struct spdk_bdev_shared_resource *shared_resource = bdev_io->internal.ch->shared_resource;
1371 :
1372 26 : assert(bdev_io->internal.f.has_bounce_buf == false);
1373 :
1374 26 : bdev_io->internal.data_transfer_cpl = cpl_cb;
1375 26 : bdev_io->internal.f.has_bounce_buf = true;
1376 : /* save original iovec */
1377 26 : bdev_io->internal.bounce_buf.orig_iovs = bdev_io->u.bdev.iovs;
1378 26 : bdev_io->internal.bounce_buf.orig_iovcnt = bdev_io->u.bdev.iovcnt;
1379 : /* zero the other data members */
1380 26 : bdev_io->internal.bounce_buf.iov.iov_base = NULL;
1381 26 : bdev_io->internal.bounce_buf.md_iov.iov_base = NULL;
1382 26 : bdev_io->internal.bounce_buf.orig_md_iov.iov_base = NULL;
1383 : /* set bounce iov */
1384 26 : bdev_io->u.bdev.iovs = &bdev_io->internal.bounce_buf.iov;
1385 26 : bdev_io->u.bdev.iovcnt = 1;
1386 : /* set bounce buffer for this operation */
1387 26 : bdev_io->u.bdev.iovs[0].iov_base = buf;
1388 26 : bdev_io->u.bdev.iovs[0].iov_len = len;
1389 : /* Now we use 1 iov, the split condition could have been changed */
1390 26 : bdev_io->internal.f.split = bdev_io_should_split(bdev_io);
1391 :
1392 26 : if (spdk_unlikely(!TAILQ_EMPTY(&shared_resource->nomem_io))) {
1393 0 : bdev_queue_nomem_io_tail(shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL);
1394 0 : } else {
1395 26 : bdev_io_pull_data(bdev_io);
1396 : }
1397 26 : }
1398 :
1399 : static void
1400 42 : _bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t len)
1401 : {
1402 42 : struct spdk_bdev *bdev = bdev_io->bdev;
1403 : bool buf_allocated;
1404 : uint64_t alignment;
1405 : void *aligned_buf;
1406 :
1407 42 : bdev_io->internal.buf.ptr = buf;
1408 42 : bdev_io->internal.f.has_buf = true;
1409 :
1410 42 : if (spdk_unlikely(bdev_io->internal.get_aux_buf_cb != NULL)) {
1411 0 : bdev_io_get_buf_complete(bdev_io, true);
1412 0 : return;
1413 : }
1414 :
1415 42 : alignment = spdk_bdev_get_buf_align(bdev);
1416 42 : buf_allocated = _is_buf_allocated(bdev_io->u.bdev.iovs);
1417 42 : aligned_buf = (void *)(((uintptr_t)buf + (alignment - 1)) & ~(alignment - 1));
1418 :
1419 42 : if (buf_allocated) {
1420 26 : _bdev_io_pull_bounce_data_buf(bdev_io, aligned_buf, len, _bdev_io_pull_buffer_cpl);
1421 : /* Continue in completion callback */
1422 26 : return;
1423 : } else {
1424 16 : spdk_bdev_io_set_buf(bdev_io, aligned_buf, len);
1425 : }
1426 :
1427 16 : _bdev_io_set_md_buf(bdev_io);
1428 42 : }
1429 :
1430 : static inline uint64_t
1431 84 : bdev_io_get_max_buf_len(struct spdk_bdev_io *bdev_io, uint64_t len)
1432 : {
1433 84 : struct spdk_bdev *bdev = bdev_io->bdev;
1434 : uint64_t md_len, alignment;
1435 :
1436 84 : md_len = spdk_bdev_is_md_separate(bdev) ? bdev_io->u.bdev.num_blocks * bdev->md_len : 0;
1437 :
1438 : /* 1 byte alignment needs 0 byte of extra space, 64 bytes alignment needs 63 bytes of extra space, etc. */
1439 84 : alignment = spdk_bdev_get_buf_align(bdev) - 1;
1440 :
1441 84 : return len + alignment + md_len;
1442 : }
1443 :
1444 : static void
1445 42 : _bdev_io_put_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t buf_len)
1446 : {
1447 : struct spdk_bdev_mgmt_channel *ch;
1448 :
1449 42 : ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
1450 42 : spdk_iobuf_put(&ch->iobuf, buf, bdev_io_get_max_buf_len(bdev_io, buf_len));
1451 42 : }
1452 :
1453 : static void
1454 42 : bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
1455 : {
1456 42 : assert(bdev_io->internal.f.has_buf);
1457 42 : _bdev_io_put_buf(bdev_io, bdev_io->internal.buf.ptr, bdev_io->internal.buf.len);
1458 42 : bdev_io->internal.buf.ptr = NULL;
1459 42 : bdev_io->internal.f.has_buf = false;
1460 42 : }
1461 :
1462 3 : SPDK_LOG_DEPRECATION_REGISTER(spdk_bdev_io_put_aux_buf,
1463 : "spdk_bdev_io_put_aux_buf is deprecated", "v25.01", 0);
1464 :
1465 : void
1466 0 : spdk_bdev_io_put_aux_buf(struct spdk_bdev_io *bdev_io, void *buf)
1467 : {
1468 0 : uint64_t len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
1469 :
1470 0 : SPDK_LOG_DEPRECATED(spdk_bdev_io_put_aux_buf);
1471 :
1472 0 : assert(buf != NULL);
1473 0 : _bdev_io_put_buf(bdev_io, buf, len);
1474 0 : }
1475 :
1476 : static inline void
1477 548 : bdev_submit_request(struct spdk_bdev *bdev, struct spdk_io_channel *ioch,
1478 : struct spdk_bdev_io *bdev_io)
1479 : {
1480 : /* After a request is submitted to a bdev module, the ownership of an accel sequence
1481 : * associated with that bdev_io is transferred to the bdev module. So, clear the internal
1482 : * sequence pointer to make sure we won't touch it anymore. */
1483 1014 : if ((bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE ||
1484 548 : bdev_io->type == SPDK_BDEV_IO_TYPE_READ) && bdev_io->u.bdev.accel_sequence != NULL) {
1485 0 : assert(!bdev_io_needs_sequence_exec(bdev_io->internal.desc, bdev_io));
1486 0 : bdev_io->internal.f.has_accel_sequence = false;
1487 0 : }
1488 :
1489 548 : bdev->fn_table->submit_request(ioch, bdev_io);
1490 548 : }
1491 :
1492 : static inline void
1493 10 : bdev_ch_resubmit_io(struct spdk_bdev_shared_resource *shared_resource, struct spdk_bdev_io *bdev_io)
1494 : {
1495 10 : struct spdk_bdev *bdev = bdev_io->bdev;
1496 :
1497 10 : bdev_io_increment_outstanding(bdev_io->internal.ch, shared_resource);
1498 10 : bdev_io->internal.error.nvme.cdw0 = 0;
1499 10 : bdev_io->num_retries++;
1500 10 : bdev_submit_request(bdev, spdk_bdev_io_get_io_channel(bdev_io), bdev_io);
1501 10 : }
1502 :
1503 : static void
1504 63 : bdev_shared_ch_retry_io(struct spdk_bdev_shared_resource *shared_resource)
1505 : {
1506 : struct spdk_bdev_io *bdev_io;
1507 :
1508 63 : if (shared_resource->io_outstanding > shared_resource->nomem_threshold) {
1509 : /*
1510 : * Allow some more I/O to complete before retrying the nomem_io queue.
1511 : * Some drivers (such as nvme) cannot immediately take a new I/O in
1512 : * the context of a completion, because the resources for the I/O are
1513 : * not released until control returns to the bdev poller. Also, we
1514 : * may require several small I/O to complete before a larger I/O
1515 : * (that requires splitting) can be submitted.
1516 : */
1517 58 : return;
1518 : }
1519 :
1520 16 : while (!TAILQ_EMPTY(&shared_resource->nomem_io)) {
1521 12 : bdev_io = TAILQ_FIRST(&shared_resource->nomem_io);
1522 12 : TAILQ_REMOVE(&shared_resource->nomem_io, bdev_io, internal.link);
1523 :
1524 12 : switch (bdev_io->internal.retry_state) {
1525 : case BDEV_IO_RETRY_STATE_SUBMIT:
1526 10 : bdev_ch_resubmit_io(shared_resource, bdev_io);
1527 10 : break;
1528 : case BDEV_IO_RETRY_STATE_PULL:
1529 1 : bdev_io_pull_data(bdev_io);
1530 1 : break;
1531 : case BDEV_IO_RETRY_STATE_PULL_MD:
1532 0 : bdev_io_pull_md_buf(bdev_io);
1533 0 : break;
1534 : case BDEV_IO_RETRY_STATE_PUSH:
1535 1 : bdev_io_push_bounce_data(bdev_io);
1536 1 : break;
1537 : case BDEV_IO_RETRY_STATE_PUSH_MD:
1538 0 : bdev_io_push_bounce_md_buf(bdev_io);
1539 0 : break;
1540 : default:
1541 0 : assert(0 && "invalid retry state");
1542 : break;
1543 : }
1544 :
1545 12 : if (bdev_io == TAILQ_FIRST(&shared_resource->nomem_io)) {
1546 : /* This IO completed again with NOMEM status, so break the loop and
1547 : * don't try anymore. Note that a bdev_io that fails with NOMEM
1548 : * always gets requeued at the front of the list, to maintain
1549 : * ordering.
1550 : */
1551 1 : break;
1552 : }
1553 : }
1554 63 : }
1555 :
1556 : static void
1557 63 : bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch)
1558 : {
1559 63 : bdev_shared_ch_retry_io(bdev_ch->shared_resource);
1560 63 : }
1561 :
1562 : static int
1563 0 : bdev_no_mem_poller(void *ctx)
1564 : {
1565 0 : struct spdk_bdev_shared_resource *shared_resource = ctx;
1566 :
1567 0 : spdk_poller_unregister(&shared_resource->nomem_poller);
1568 :
1569 0 : if (!TAILQ_EMPTY(&shared_resource->nomem_io)) {
1570 0 : bdev_shared_ch_retry_io(shared_resource);
1571 0 : }
1572 : /* the retry cb may re-register the poller so double check */
1573 0 : if (!TAILQ_EMPTY(&shared_resource->nomem_io) &&
1574 0 : shared_resource->io_outstanding == 0 && shared_resource->nomem_poller == NULL) {
1575 : /* No IOs were submitted, try again */
1576 0 : shared_resource->nomem_poller = SPDK_POLLER_REGISTER(bdev_no_mem_poller, shared_resource,
1577 : SPDK_BDEV_IO_POLL_INTERVAL_IN_MSEC * 10);
1578 0 : }
1579 :
1580 0 : return SPDK_POLLER_BUSY;
1581 : }
1582 :
1583 : static inline bool
1584 556 : _bdev_io_handle_no_mem(struct spdk_bdev_io *bdev_io, enum bdev_io_retry_state state)
1585 : {
1586 556 : struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
1587 556 : struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
1588 :
1589 556 : if (spdk_unlikely(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM)) {
1590 5 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
1591 5 : bdev_queue_nomem_io_head(shared_resource, bdev_io, state);
1592 :
1593 5 : if (shared_resource->io_outstanding == 0 && !shared_resource->nomem_poller) {
1594 : /* Special case when we have nomem IOs and no outstanding IOs which completions
1595 : * could trigger retry of queued IOs
1596 : * Any IOs submitted may trigger retry of queued IOs. This poller handles a case when no
1597 : * new IOs submitted, e.g. qd==1 */
1598 0 : shared_resource->nomem_poller = SPDK_POLLER_REGISTER(bdev_no_mem_poller, shared_resource,
1599 : SPDK_BDEV_IO_POLL_INTERVAL_IN_MSEC * 10);
1600 0 : }
1601 : /* If bdev module completed an I/O that has an accel sequence with NOMEM status, the
1602 : * ownership of that sequence is transferred back to the bdev layer, so we need to
1603 : * restore internal.accel_sequence to make sure that the sequence is handled
1604 : * correctly in case the I/O is later aborted. */
1605 5 : if ((bdev_io->type == SPDK_BDEV_IO_TYPE_READ ||
1606 5 : bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) && bdev_io->u.bdev.accel_sequence) {
1607 0 : assert(!bdev_io_use_accel_sequence(bdev_io));
1608 0 : bdev_io->internal.f.has_accel_sequence = true;
1609 0 : bdev_io->internal.accel_sequence = bdev_io->u.bdev.accel_sequence;
1610 0 : }
1611 :
1612 5 : return true;
1613 : }
1614 :
1615 551 : if (spdk_unlikely(!TAILQ_EMPTY(&shared_resource->nomem_io))) {
1616 63 : bdev_ch_retry_io(bdev_ch);
1617 63 : }
1618 :
1619 551 : return false;
1620 556 : }
1621 :
1622 : static void
1623 26 : _bdev_io_complete_push_bounce_done(void *ctx, int rc)
1624 : {
1625 26 : struct spdk_bdev_io *bdev_io = ctx;
1626 26 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1627 :
1628 26 : if (rc) {
1629 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
1630 0 : }
1631 : /* We want to free the bounce buffer here since we know we're done with it (as opposed
1632 : * to waiting for the conditional free of internal.buf.ptr in spdk_bdev_free_io()).
1633 : */
1634 26 : bdev_io_put_buf(bdev_io);
1635 :
1636 26 : if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
1637 0 : bdev_ch_retry_io(ch);
1638 0 : }
1639 :
1640 : /* Continue with IO completion flow */
1641 26 : bdev_io_complete(bdev_io);
1642 26 : }
1643 :
1644 : static void
1645 2 : bdev_io_push_bounce_md_buf_done(void *ctx, int rc)
1646 : {
1647 2 : struct spdk_bdev_io *bdev_io = ctx;
1648 2 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1649 :
1650 2 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1651 2 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1652 2 : bdev_io->internal.f.has_bounce_buf = false;
1653 :
1654 2 : if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
1655 0 : bdev_ch_retry_io(ch);
1656 0 : }
1657 :
1658 2 : bdev_io->internal.data_transfer_cpl(bdev_io, rc);
1659 2 : }
1660 :
1661 : static inline void
1662 26 : bdev_io_push_bounce_md_buf(struct spdk_bdev_io *bdev_io)
1663 : {
1664 26 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1665 26 : int rc = 0;
1666 :
1667 26 : assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
1668 26 : assert(bdev_io->internal.f.has_bounce_buf);
1669 :
1670 : /* do the same for metadata buffer */
1671 26 : if (spdk_unlikely(bdev_io->internal.bounce_buf.orig_md_iov.iov_base != NULL)) {
1672 4 : assert(spdk_bdev_is_md_separate(bdev_io->bdev));
1673 :
1674 4 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
1675 2 : if (bdev_io_use_memory_domain(bdev_io)) {
1676 2 : TAILQ_INSERT_TAIL(&ch->io_memory_domain, bdev_io, internal.link);
1677 2 : bdev_io_increment_outstanding(ch, ch->shared_resource);
1678 : /* If memory domain is used then we need to call async push function */
1679 4 : rc = spdk_memory_domain_push_data(bdev_io->internal.memory_domain,
1680 2 : bdev_io->internal.memory_domain_ctx,
1681 2 : &bdev_io->internal.bounce_buf.orig_md_iov,
1682 2 : (uint32_t)bdev_io->internal.bounce_buf.orig_iovcnt,
1683 2 : &bdev_io->internal.bounce_buf.md_iov, 1,
1684 : bdev_io_push_bounce_md_buf_done,
1685 2 : bdev_io);
1686 2 : if (rc == 0) {
1687 : /* Continue IO completion in async callback */
1688 2 : return;
1689 : }
1690 0 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1691 0 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1692 0 : if (rc != -ENOMEM) {
1693 0 : SPDK_ERRLOG("Failed to push md to memory domain %s\n",
1694 : spdk_memory_domain_get_dma_device_id(
1695 : bdev_io->internal.memory_domain));
1696 0 : }
1697 0 : } else {
1698 0 : memcpy(bdev_io->internal.bounce_buf.orig_md_iov.iov_base, bdev_io->u.bdev.md_buf,
1699 0 : bdev_io->internal.bounce_buf.orig_md_iov.iov_len);
1700 : }
1701 0 : }
1702 2 : }
1703 :
1704 24 : if (spdk_unlikely(rc == -ENOMEM)) {
1705 0 : bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PUSH_MD);
1706 0 : } else {
1707 24 : assert(bdev_io->internal.data_transfer_cpl);
1708 24 : bdev_io->internal.f.has_bounce_buf = false;
1709 24 : bdev_io->internal.data_transfer_cpl(bdev_io, rc);
1710 : }
1711 26 : }
1712 :
1713 : static inline void
1714 26 : bdev_io_push_bounce_data_done(struct spdk_bdev_io *bdev_io, int rc)
1715 : {
1716 26 : assert(bdev_io->internal.data_transfer_cpl);
1717 26 : if (rc) {
1718 0 : bdev_io->internal.data_transfer_cpl(bdev_io, rc);
1719 0 : return;
1720 : }
1721 :
1722 : /* set original buffer for this io */
1723 26 : bdev_io->u.bdev.iovcnt = bdev_io->internal.bounce_buf.orig_iovcnt;
1724 26 : bdev_io->u.bdev.iovs = bdev_io->internal.bounce_buf.orig_iovs;
1725 :
1726 : /* We don't set bdev_io->internal.f.has_bounce_buf to false here because
1727 : * we still need to clear the md buf */
1728 :
1729 26 : bdev_io_push_bounce_md_buf(bdev_io);
1730 26 : }
1731 :
1732 : static void
1733 2 : bdev_io_push_bounce_data_done_and_track(void *ctx, int status)
1734 : {
1735 2 : struct spdk_bdev_io *bdev_io = ctx;
1736 2 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1737 :
1738 2 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1739 2 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1740 :
1741 2 : if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
1742 0 : bdev_ch_retry_io(ch);
1743 0 : }
1744 :
1745 2 : bdev_io_push_bounce_data_done(bdev_io, status);
1746 2 : }
1747 :
1748 : static inline void
1749 27 : bdev_io_push_bounce_data(struct spdk_bdev_io *bdev_io)
1750 : {
1751 27 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1752 27 : int rc = 0;
1753 :
1754 27 : assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
1755 27 : assert(!bdev_io_use_accel_sequence(bdev_io));
1756 27 : assert(bdev_io->internal.f.has_bounce_buf);
1757 :
1758 : /* if this is read path, copy data from bounce buffer to original buffer */
1759 27 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
1760 11 : if (bdev_io_use_memory_domain(bdev_io)) {
1761 3 : TAILQ_INSERT_TAIL(&ch->io_memory_domain, bdev_io, internal.link);
1762 3 : bdev_io_increment_outstanding(ch, ch->shared_resource);
1763 : /* If memory domain is used then we need to call async push function */
1764 6 : rc = spdk_memory_domain_push_data(bdev_io->internal.memory_domain,
1765 3 : bdev_io->internal.memory_domain_ctx,
1766 3 : bdev_io->internal.bounce_buf.orig_iovs,
1767 3 : (uint32_t)bdev_io->internal.bounce_buf.orig_iovcnt,
1768 3 : &bdev_io->internal.bounce_buf.iov, 1,
1769 : bdev_io_push_bounce_data_done_and_track,
1770 3 : bdev_io);
1771 3 : if (rc == 0) {
1772 : /* Continue IO completion in async callback */
1773 2 : return;
1774 : }
1775 :
1776 1 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1777 1 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1778 1 : if (rc != -ENOMEM) {
1779 0 : SPDK_ERRLOG("Failed to push data to memory domain %s\n",
1780 : spdk_memory_domain_get_dma_device_id(
1781 : bdev_io->internal.memory_domain));
1782 0 : }
1783 1 : } else {
1784 16 : spdk_copy_buf_to_iovs(bdev_io->internal.bounce_buf.orig_iovs,
1785 8 : bdev_io->internal.bounce_buf.orig_iovcnt,
1786 8 : bdev_io->internal.bounce_buf.iov.iov_base,
1787 8 : bdev_io->internal.bounce_buf.iov.iov_len);
1788 : }
1789 9 : }
1790 :
1791 25 : if (spdk_unlikely(rc == -ENOMEM)) {
1792 1 : bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PUSH);
1793 1 : } else {
1794 24 : bdev_io_push_bounce_data_done(bdev_io, rc);
1795 : }
1796 27 : }
1797 :
1798 : static inline void
1799 26 : _bdev_io_push_bounce_data_buffer(struct spdk_bdev_io *bdev_io, bdev_copy_bounce_buffer_cpl cpl_cb)
1800 : {
1801 26 : bdev_io->internal.data_transfer_cpl = cpl_cb;
1802 26 : bdev_io_push_bounce_data(bdev_io);
1803 26 : }
1804 :
1805 : static void
1806 0 : bdev_io_get_iobuf_cb(struct spdk_iobuf_entry *iobuf, void *buf)
1807 : {
1808 : struct spdk_bdev_io *bdev_io;
1809 :
1810 0 : bdev_io = SPDK_CONTAINEROF(iobuf, struct spdk_bdev_io, internal.iobuf);
1811 0 : _bdev_io_set_buf(bdev_io, buf, bdev_io->internal.buf.len);
1812 0 : }
1813 :
1814 : static void
1815 42 : bdev_io_get_buf(struct spdk_bdev_io *bdev_io, uint64_t len)
1816 : {
1817 : struct spdk_bdev_mgmt_channel *mgmt_ch;
1818 : uint64_t max_len;
1819 : void *buf;
1820 :
1821 42 : assert(spdk_bdev_io_get_thread(bdev_io) == spdk_get_thread());
1822 42 : mgmt_ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
1823 42 : max_len = bdev_io_get_max_buf_len(bdev_io, len);
1824 :
1825 42 : if (spdk_unlikely(max_len > mgmt_ch->iobuf.cache[0].large.bufsize)) {
1826 0 : SPDK_ERRLOG("Length %" PRIu64 " is larger than allowed\n", max_len);
1827 0 : bdev_io_get_buf_complete(bdev_io, false);
1828 0 : return;
1829 : }
1830 :
1831 42 : bdev_io->internal.buf.len = len;
1832 42 : buf = spdk_iobuf_get(&mgmt_ch->iobuf, max_len, &bdev_io->internal.iobuf,
1833 : bdev_io_get_iobuf_cb);
1834 42 : if (buf != NULL) {
1835 42 : _bdev_io_set_buf(bdev_io, buf, len);
1836 42 : }
1837 42 : }
1838 :
1839 : void
1840 56 : spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len)
1841 : {
1842 56 : struct spdk_bdev *bdev = bdev_io->bdev;
1843 : uint64_t alignment;
1844 :
1845 56 : assert(cb != NULL);
1846 56 : bdev_io->internal.get_buf_cb = cb;
1847 :
1848 56 : alignment = spdk_bdev_get_buf_align(bdev);
1849 :
1850 56 : if (_is_buf_allocated(bdev_io->u.bdev.iovs) &&
1851 40 : _are_iovs_aligned(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, alignment)) {
1852 : /* Buffer already present and aligned */
1853 18 : cb(spdk_bdev_io_get_io_channel(bdev_io), bdev_io, true);
1854 18 : return;
1855 : }
1856 :
1857 38 : bdev_io_get_buf(bdev_io, len);
1858 56 : }
1859 :
1860 : static void
1861 4 : _bdev_memory_domain_get_io_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
1862 : bool success)
1863 : {
1864 4 : if (!success) {
1865 0 : SPDK_ERRLOG("Failed to get data buffer, completing IO\n");
1866 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
1867 0 : bdev_io_complete_unsubmitted(bdev_io);
1868 0 : return;
1869 : }
1870 :
1871 4 : if (bdev_io_needs_sequence_exec(bdev_io->internal.desc, bdev_io)) {
1872 0 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
1873 0 : bdev_io_exec_sequence(bdev_io, bdev_io_submit_sequence_cb);
1874 0 : return;
1875 : }
1876 : /* For reads we'll execute the sequence after the data is read, so, for now, only
1877 : * clear out accel_sequence pointer and submit the IO */
1878 0 : assert(bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
1879 0 : bdev_io->u.bdev.accel_sequence = NULL;
1880 0 : }
1881 :
1882 4 : bdev_io_submit(bdev_io);
1883 4 : }
1884 :
1885 : static void
1886 4 : _bdev_memory_domain_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb,
1887 : uint64_t len)
1888 : {
1889 4 : assert(cb != NULL);
1890 4 : bdev_io->internal.get_buf_cb = cb;
1891 :
1892 4 : bdev_io_get_buf(bdev_io, len);
1893 4 : }
1894 :
1895 :
1896 3 : SPDK_LOG_DEPRECATION_REGISTER(spdk_bdev_io_get_aux_buf,
1897 : "spdk_bdev_io_get_aux_buf is deprecated", "v25.01", 0);
1898 :
1899 : void
1900 0 : spdk_bdev_io_get_aux_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_aux_buf_cb cb)
1901 : {
1902 0 : uint64_t len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
1903 :
1904 0 : SPDK_LOG_DEPRECATED(spdk_bdev_io_get_aux_buf);
1905 :
1906 0 : assert(cb != NULL);
1907 0 : assert(bdev_io->internal.get_aux_buf_cb == NULL);
1908 0 : bdev_io->internal.get_aux_buf_cb = cb;
1909 0 : bdev_io_get_buf(bdev_io, len);
1910 0 : }
1911 :
1912 : static int
1913 65 : bdev_module_get_max_ctx_size(void)
1914 : {
1915 : struct spdk_bdev_module *bdev_module;
1916 65 : int max_bdev_module_size = 0;
1917 :
1918 254 : TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
1919 189 : if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) {
1920 64 : max_bdev_module_size = bdev_module->get_ctx_size();
1921 64 : }
1922 189 : }
1923 :
1924 65 : return max_bdev_module_size;
1925 : }
1926 :
1927 : static void
1928 0 : bdev_enable_histogram_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
1929 : {
1930 0 : if (!bdev->internal.histogram_enabled) {
1931 0 : return;
1932 : }
1933 :
1934 0 : spdk_json_write_object_begin(w);
1935 0 : spdk_json_write_named_string(w, "method", "bdev_enable_histogram");
1936 :
1937 0 : spdk_json_write_named_object_begin(w, "params");
1938 0 : spdk_json_write_named_string(w, "name", bdev->name);
1939 :
1940 0 : spdk_json_write_named_bool(w, "enable", bdev->internal.histogram_enabled);
1941 :
1942 0 : if (bdev->internal.histogram_io_type) {
1943 0 : spdk_json_write_named_string(w, "opc",
1944 0 : spdk_bdev_get_io_type_name(bdev->internal.histogram_io_type));
1945 0 : }
1946 :
1947 0 : spdk_json_write_object_end(w);
1948 :
1949 0 : spdk_json_write_object_end(w);
1950 0 : }
1951 :
1952 : static void
1953 0 : bdev_qos_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
1954 : {
1955 : int i;
1956 0 : struct spdk_bdev_qos *qos = bdev->internal.qos;
1957 : uint64_t limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
1958 :
1959 0 : if (!qos) {
1960 0 : return;
1961 : }
1962 :
1963 0 : spdk_bdev_get_qos_rate_limits(bdev, limits);
1964 :
1965 0 : spdk_json_write_object_begin(w);
1966 0 : spdk_json_write_named_string(w, "method", "bdev_set_qos_limit");
1967 :
1968 0 : spdk_json_write_named_object_begin(w, "params");
1969 0 : spdk_json_write_named_string(w, "name", bdev->name);
1970 0 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
1971 0 : if (limits[i] > 0) {
1972 0 : spdk_json_write_named_uint64(w, qos_rpc_type[i], limits[i]);
1973 0 : }
1974 0 : }
1975 0 : spdk_json_write_object_end(w);
1976 :
1977 0 : spdk_json_write_object_end(w);
1978 0 : }
1979 :
1980 : void
1981 0 : spdk_bdev_subsystem_config_json(struct spdk_json_write_ctx *w)
1982 : {
1983 : struct spdk_bdev_module *bdev_module;
1984 : struct spdk_bdev *bdev;
1985 :
1986 0 : assert(w != NULL);
1987 :
1988 0 : spdk_json_write_array_begin(w);
1989 :
1990 0 : spdk_json_write_object_begin(w);
1991 0 : spdk_json_write_named_string(w, "method", "bdev_set_options");
1992 0 : spdk_json_write_named_object_begin(w, "params");
1993 0 : spdk_json_write_named_uint32(w, "bdev_io_pool_size", g_bdev_opts.bdev_io_pool_size);
1994 0 : spdk_json_write_named_uint32(w, "bdev_io_cache_size", g_bdev_opts.bdev_io_cache_size);
1995 0 : spdk_json_write_named_bool(w, "bdev_auto_examine", g_bdev_opts.bdev_auto_examine);
1996 0 : spdk_json_write_named_uint32(w, "iobuf_small_cache_size", g_bdev_opts.iobuf_small_cache_size);
1997 0 : spdk_json_write_named_uint32(w, "iobuf_large_cache_size", g_bdev_opts.iobuf_large_cache_size);
1998 0 : spdk_json_write_object_end(w);
1999 0 : spdk_json_write_object_end(w);
2000 :
2001 0 : bdev_examine_allowlist_config_json(w);
2002 :
2003 0 : TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
2004 0 : if (bdev_module->config_json) {
2005 0 : bdev_module->config_json(w);
2006 0 : }
2007 0 : }
2008 :
2009 0 : spdk_spin_lock(&g_bdev_mgr.spinlock);
2010 :
2011 0 : TAILQ_FOREACH(bdev, &g_bdev_mgr.bdevs, internal.link) {
2012 0 : if (bdev->fn_table->write_config_json) {
2013 0 : bdev->fn_table->write_config_json(bdev, w);
2014 0 : }
2015 :
2016 0 : bdev_qos_config_json(bdev, w);
2017 0 : bdev_enable_histogram_config_json(bdev, w);
2018 0 : }
2019 :
2020 0 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
2021 :
2022 : /* This has to be last RPC in array to make sure all bdevs finished examine */
2023 0 : spdk_json_write_object_begin(w);
2024 0 : spdk_json_write_named_string(w, "method", "bdev_wait_for_examine");
2025 0 : spdk_json_write_object_end(w);
2026 :
2027 0 : spdk_json_write_array_end(w);
2028 0 : }
2029 :
2030 : static void
2031 71 : bdev_mgmt_channel_destroy(void *io_device, void *ctx_buf)
2032 : {
2033 71 : struct spdk_bdev_mgmt_channel *ch = ctx_buf;
2034 : struct spdk_bdev_io *bdev_io;
2035 :
2036 71 : spdk_iobuf_channel_fini(&ch->iobuf);
2037 :
2038 9969 : while (!STAILQ_EMPTY(&ch->per_thread_cache)) {
2039 9898 : bdev_io = STAILQ_FIRST(&ch->per_thread_cache);
2040 9898 : STAILQ_REMOVE_HEAD(&ch->per_thread_cache, internal.buf_link);
2041 9898 : ch->per_thread_cache_count--;
2042 9898 : spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io);
2043 : }
2044 :
2045 71 : assert(ch->per_thread_cache_count == 0);
2046 71 : }
2047 :
2048 : static int
2049 71 : bdev_mgmt_channel_create(void *io_device, void *ctx_buf)
2050 : {
2051 71 : struct spdk_bdev_mgmt_channel *ch = ctx_buf;
2052 : struct spdk_bdev_io *bdev_io;
2053 : uint32_t i;
2054 : int rc;
2055 :
2056 142 : rc = spdk_iobuf_channel_init(&ch->iobuf, "bdev",
2057 71 : g_bdev_opts.iobuf_small_cache_size,
2058 71 : g_bdev_opts.iobuf_large_cache_size);
2059 71 : if (rc != 0) {
2060 0 : SPDK_ERRLOG("Failed to create iobuf channel: %s\n", spdk_strerror(-rc));
2061 0 : return -1;
2062 : }
2063 :
2064 71 : STAILQ_INIT(&ch->per_thread_cache);
2065 71 : ch->bdev_io_cache_size = g_bdev_opts.bdev_io_cache_size;
2066 :
2067 : /* Pre-populate bdev_io cache to ensure this thread cannot be starved. */
2068 71 : ch->per_thread_cache_count = 0;
2069 9969 : for (i = 0; i < ch->bdev_io_cache_size; i++) {
2070 9898 : bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
2071 9898 : if (bdev_io == NULL) {
2072 0 : SPDK_ERRLOG("You need to increase bdev_io_pool_size using bdev_set_options RPC.\n");
2073 0 : assert(false);
2074 : bdev_mgmt_channel_destroy(io_device, ctx_buf);
2075 : return -1;
2076 : }
2077 9898 : ch->per_thread_cache_count++;
2078 9898 : STAILQ_INSERT_HEAD(&ch->per_thread_cache, bdev_io, internal.buf_link);
2079 9898 : }
2080 :
2081 71 : TAILQ_INIT(&ch->shared_resources);
2082 71 : TAILQ_INIT(&ch->io_wait_queue);
2083 :
2084 71 : return 0;
2085 71 : }
2086 :
2087 : static void
2088 65 : bdev_init_complete(int rc)
2089 : {
2090 65 : spdk_bdev_init_cb cb_fn = g_init_cb_fn;
2091 65 : void *cb_arg = g_init_cb_arg;
2092 : struct spdk_bdev_module *m;
2093 :
2094 65 : g_bdev_mgr.init_complete = true;
2095 65 : g_init_cb_fn = NULL;
2096 65 : g_init_cb_arg = NULL;
2097 :
2098 : /*
2099 : * For modules that need to know when subsystem init is complete,
2100 : * inform them now.
2101 : */
2102 65 : if (rc == 0) {
2103 254 : TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
2104 189 : if (m->init_complete) {
2105 23 : m->init_complete();
2106 23 : }
2107 189 : }
2108 65 : }
2109 :
2110 65 : cb_fn(cb_arg, rc);
2111 65 : }
2112 :
2113 : static bool
2114 257 : bdev_module_all_actions_completed(void)
2115 : {
2116 : struct spdk_bdev_module *m;
2117 :
2118 1017 : TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
2119 760 : if (m->internal.action_in_progress > 0) {
2120 0 : return false;
2121 : }
2122 760 : }
2123 257 : return true;
2124 257 : }
2125 :
2126 : static void
2127 622 : bdev_module_action_complete(void)
2128 : {
2129 : /*
2130 : * Don't finish bdev subsystem initialization if
2131 : * module pre-initialization is still in progress, or
2132 : * the subsystem been already initialized.
2133 : */
2134 622 : if (!g_bdev_mgr.module_init_complete || g_bdev_mgr.init_complete) {
2135 557 : return;
2136 : }
2137 :
2138 : /*
2139 : * Check all bdev modules for inits/examinations in progress. If any
2140 : * exist, return immediately since we cannot finish bdev subsystem
2141 : * initialization until all are completed.
2142 : */
2143 65 : if (!bdev_module_all_actions_completed()) {
2144 0 : return;
2145 : }
2146 :
2147 : /*
2148 : * Modules already finished initialization - now that all
2149 : * the bdev modules have finished their asynchronous I/O
2150 : * processing, the entire bdev layer can be marked as complete.
2151 : */
2152 65 : bdev_init_complete(0);
2153 622 : }
2154 :
2155 : static void
2156 557 : bdev_module_action_done(struct spdk_bdev_module *module)
2157 : {
2158 557 : spdk_spin_lock(&module->internal.spinlock);
2159 557 : assert(module->internal.action_in_progress > 0);
2160 557 : module->internal.action_in_progress--;
2161 557 : spdk_spin_unlock(&module->internal.spinlock);
2162 557 : bdev_module_action_complete();
2163 557 : }
2164 :
2165 : void
2166 65 : spdk_bdev_module_init_done(struct spdk_bdev_module *module)
2167 : {
2168 65 : assert(module->async_init);
2169 65 : bdev_module_action_done(module);
2170 65 : }
2171 :
2172 : void
2173 492 : spdk_bdev_module_examine_done(struct spdk_bdev_module *module)
2174 : {
2175 492 : bdev_module_action_done(module);
2176 492 : }
2177 :
2178 : /** The last initialized bdev module */
2179 : static struct spdk_bdev_module *g_resume_bdev_module = NULL;
2180 :
2181 : static void
2182 0 : bdev_init_failed(void *cb_arg)
2183 : {
2184 0 : struct spdk_bdev_module *module = cb_arg;
2185 :
2186 0 : spdk_spin_lock(&module->internal.spinlock);
2187 0 : assert(module->internal.action_in_progress > 0);
2188 0 : module->internal.action_in_progress--;
2189 0 : spdk_spin_unlock(&module->internal.spinlock);
2190 0 : bdev_init_complete(-1);
2191 0 : }
2192 :
2193 : static int
2194 65 : bdev_modules_init(void)
2195 : {
2196 : struct spdk_bdev_module *module;
2197 65 : int rc = 0;
2198 :
2199 254 : TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
2200 189 : g_resume_bdev_module = module;
2201 189 : if (module->async_init) {
2202 65 : spdk_spin_lock(&module->internal.spinlock);
2203 65 : module->internal.action_in_progress = 1;
2204 65 : spdk_spin_unlock(&module->internal.spinlock);
2205 65 : }
2206 189 : rc = module->module_init();
2207 189 : if (rc != 0) {
2208 : /* Bump action_in_progress to prevent other modules from completion of modules_init
2209 : * Send message to defer application shutdown until resources are cleaned up */
2210 0 : spdk_spin_lock(&module->internal.spinlock);
2211 0 : module->internal.action_in_progress = 1;
2212 0 : spdk_spin_unlock(&module->internal.spinlock);
2213 0 : spdk_thread_send_msg(spdk_get_thread(), bdev_init_failed, module);
2214 0 : return rc;
2215 : }
2216 189 : }
2217 :
2218 65 : g_resume_bdev_module = NULL;
2219 65 : return 0;
2220 65 : }
2221 :
2222 : void
2223 65 : spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg)
2224 : {
2225 65 : int rc = 0;
2226 : char mempool_name[32];
2227 :
2228 65 : assert(cb_fn != NULL);
2229 :
2230 65 : g_init_cb_fn = cb_fn;
2231 65 : g_init_cb_arg = cb_arg;
2232 :
2233 65 : spdk_notify_type_register("bdev_register");
2234 65 : spdk_notify_type_register("bdev_unregister");
2235 :
2236 65 : snprintf(mempool_name, sizeof(mempool_name), "bdev_io_%d", getpid());
2237 :
2238 65 : rc = spdk_iobuf_register_module("bdev");
2239 65 : if (rc != 0) {
2240 0 : SPDK_ERRLOG("could not register bdev iobuf module: %s\n", spdk_strerror(-rc));
2241 0 : bdev_init_complete(-1);
2242 0 : return;
2243 : }
2244 :
2245 130 : g_bdev_mgr.bdev_io_pool = spdk_mempool_create(mempool_name,
2246 65 : g_bdev_opts.bdev_io_pool_size,
2247 65 : sizeof(struct spdk_bdev_io) +
2248 65 : bdev_module_get_max_ctx_size(),
2249 : 0,
2250 : SPDK_ENV_NUMA_ID_ANY);
2251 :
2252 65 : if (g_bdev_mgr.bdev_io_pool == NULL) {
2253 0 : SPDK_ERRLOG("could not allocate spdk_bdev_io pool\n");
2254 0 : bdev_init_complete(-1);
2255 0 : return;
2256 : }
2257 :
2258 65 : g_bdev_mgr.zero_buffer = spdk_zmalloc(ZERO_BUFFER_SIZE, ZERO_BUFFER_SIZE,
2259 : NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
2260 65 : if (!g_bdev_mgr.zero_buffer) {
2261 0 : SPDK_ERRLOG("create bdev zero buffer failed\n");
2262 0 : bdev_init_complete(-1);
2263 0 : return;
2264 : }
2265 :
2266 : #ifdef SPDK_CONFIG_VTUNE
2267 : g_bdev_mgr.domain = __itt_domain_create("spdk_bdev");
2268 : #endif
2269 :
2270 65 : spdk_io_device_register(&g_bdev_mgr, bdev_mgmt_channel_create,
2271 : bdev_mgmt_channel_destroy,
2272 : sizeof(struct spdk_bdev_mgmt_channel),
2273 : "bdev_mgr");
2274 :
2275 65 : rc = bdev_modules_init();
2276 65 : g_bdev_mgr.module_init_complete = true;
2277 65 : if (rc != 0) {
2278 0 : SPDK_ERRLOG("bdev modules init failed\n");
2279 0 : return;
2280 : }
2281 :
2282 65 : bdev_module_action_complete();
2283 65 : }
2284 :
2285 : static void
2286 65 : bdev_mgr_unregister_cb(void *io_device)
2287 : {
2288 65 : spdk_bdev_fini_cb cb_fn = g_fini_cb_fn;
2289 :
2290 65 : if (g_bdev_mgr.bdev_io_pool) {
2291 65 : if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != g_bdev_opts.bdev_io_pool_size) {
2292 0 : SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n",
2293 : spdk_mempool_count(g_bdev_mgr.bdev_io_pool),
2294 : g_bdev_opts.bdev_io_pool_size);
2295 0 : }
2296 :
2297 65 : spdk_mempool_free(g_bdev_mgr.bdev_io_pool);
2298 65 : }
2299 :
2300 65 : spdk_free(g_bdev_mgr.zero_buffer);
2301 :
2302 65 : bdev_examine_allowlist_free();
2303 :
2304 65 : cb_fn(g_fini_cb_arg);
2305 65 : g_fini_cb_fn = NULL;
2306 65 : g_fini_cb_arg = NULL;
2307 65 : g_bdev_mgr.init_complete = false;
2308 65 : g_bdev_mgr.module_init_complete = false;
2309 65 : }
2310 :
2311 : static void
2312 65 : bdev_module_fini_iter(void *arg)
2313 : {
2314 : struct spdk_bdev_module *bdev_module;
2315 :
2316 : /* FIXME: Handling initialization failures is broken now,
2317 : * so we won't even try cleaning up after successfully
2318 : * initialized modules. if module_init_complete is false,
2319 : * just call spdk_bdev_mgr_unregister_cb
2320 : */
2321 65 : if (!g_bdev_mgr.module_init_complete) {
2322 0 : bdev_mgr_unregister_cb(NULL);
2323 0 : return;
2324 : }
2325 :
2326 : /* Start iterating from the last touched module */
2327 65 : if (!g_resume_bdev_module) {
2328 65 : bdev_module = TAILQ_LAST(&g_bdev_mgr.bdev_modules, bdev_module_list);
2329 65 : } else {
2330 0 : bdev_module = TAILQ_PREV(g_resume_bdev_module, bdev_module_list,
2331 : internal.tailq);
2332 : }
2333 :
2334 254 : while (bdev_module) {
2335 189 : if (bdev_module->async_fini) {
2336 : /* Save our place so we can resume later. We must
2337 : * save the variable here, before calling module_fini()
2338 : * below, because in some cases the module may immediately
2339 : * call spdk_bdev_module_fini_done() and re-enter
2340 : * this function to continue iterating. */
2341 0 : g_resume_bdev_module = bdev_module;
2342 0 : }
2343 :
2344 189 : if (bdev_module->module_fini) {
2345 189 : bdev_module->module_fini();
2346 189 : }
2347 :
2348 189 : if (bdev_module->async_fini) {
2349 0 : return;
2350 : }
2351 :
2352 189 : bdev_module = TAILQ_PREV(bdev_module, bdev_module_list,
2353 : internal.tailq);
2354 : }
2355 :
2356 65 : g_resume_bdev_module = NULL;
2357 65 : spdk_io_device_unregister(&g_bdev_mgr, bdev_mgr_unregister_cb);
2358 65 : }
2359 :
2360 : void
2361 0 : spdk_bdev_module_fini_done(void)
2362 : {
2363 0 : if (spdk_get_thread() != g_fini_thread) {
2364 0 : spdk_thread_send_msg(g_fini_thread, bdev_module_fini_iter, NULL);
2365 0 : } else {
2366 0 : bdev_module_fini_iter(NULL);
2367 : }
2368 0 : }
2369 :
2370 : static void
2371 65 : bdev_finish_unregister_bdevs_iter(void *cb_arg, int bdeverrno)
2372 : {
2373 65 : struct spdk_bdev *bdev = cb_arg;
2374 :
2375 65 : if (bdeverrno && bdev) {
2376 0 : SPDK_WARNLOG("Unable to unregister bdev '%s' during spdk_bdev_finish()\n",
2377 : bdev->name);
2378 :
2379 : /*
2380 : * Since the call to spdk_bdev_unregister() failed, we have no way to free this
2381 : * bdev; try to continue by manually removing this bdev from the list and continue
2382 : * with the next bdev in the list.
2383 : */
2384 0 : TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, internal.link);
2385 0 : }
2386 :
2387 65 : if (TAILQ_EMPTY(&g_bdev_mgr.bdevs)) {
2388 65 : SPDK_DEBUGLOG(bdev, "Done unregistering bdevs\n");
2389 : /*
2390 : * Bdev module finish need to be deferred as we might be in the middle of some context
2391 : * (like bdev part free) that will use this bdev (or private bdev driver ctx data)
2392 : * after returning.
2393 : */
2394 65 : spdk_thread_send_msg(spdk_get_thread(), bdev_module_fini_iter, NULL);
2395 65 : return;
2396 : }
2397 :
2398 : /*
2399 : * Unregister last unclaimed bdev in the list, to ensure that bdev subsystem
2400 : * shutdown proceeds top-down. The goal is to give virtual bdevs an opportunity
2401 : * to detect clean shutdown as opposed to run-time hot removal of the underlying
2402 : * base bdevs.
2403 : *
2404 : * Also, walk the list in the reverse order.
2405 : */
2406 0 : for (bdev = TAILQ_LAST(&g_bdev_mgr.bdevs, spdk_bdev_list);
2407 0 : bdev; bdev = TAILQ_PREV(bdev, spdk_bdev_list, internal.link)) {
2408 0 : spdk_spin_lock(&bdev->internal.spinlock);
2409 0 : if (bdev->internal.claim_type != SPDK_BDEV_CLAIM_NONE) {
2410 0 : LOG_ALREADY_CLAIMED_DEBUG("claimed, skipping", bdev);
2411 0 : spdk_spin_unlock(&bdev->internal.spinlock);
2412 0 : continue;
2413 : }
2414 0 : spdk_spin_unlock(&bdev->internal.spinlock);
2415 :
2416 0 : SPDK_DEBUGLOG(bdev, "Unregistering bdev '%s'\n", bdev->name);
2417 0 : spdk_bdev_unregister(bdev, bdev_finish_unregister_bdevs_iter, bdev);
2418 0 : return;
2419 : }
2420 :
2421 : /*
2422 : * If any bdev fails to unclaim underlying bdev properly, we may face the
2423 : * case of bdev list consisting of claimed bdevs only (if claims are managed
2424 : * correctly, this would mean there's a loop in the claims graph which is
2425 : * clearly impossible). Warn and unregister last bdev on the list then.
2426 : */
2427 0 : for (bdev = TAILQ_LAST(&g_bdev_mgr.bdevs, spdk_bdev_list);
2428 0 : bdev; bdev = TAILQ_PREV(bdev, spdk_bdev_list, internal.link)) {
2429 0 : SPDK_WARNLOG("Unregistering claimed bdev '%s'!\n", bdev->name);
2430 0 : spdk_bdev_unregister(bdev, bdev_finish_unregister_bdevs_iter, bdev);
2431 0 : return;
2432 : }
2433 65 : }
2434 :
2435 : static void
2436 65 : bdev_module_fini_start_iter(void *arg)
2437 : {
2438 : struct spdk_bdev_module *bdev_module;
2439 :
2440 65 : if (!g_resume_bdev_module) {
2441 65 : bdev_module = TAILQ_LAST(&g_bdev_mgr.bdev_modules, bdev_module_list);
2442 65 : } else {
2443 0 : bdev_module = TAILQ_PREV(g_resume_bdev_module, bdev_module_list, internal.tailq);
2444 : }
2445 :
2446 254 : while (bdev_module) {
2447 189 : if (bdev_module->async_fini_start) {
2448 : /* Save our place so we can resume later. We must
2449 : * save the variable here, before calling fini_start()
2450 : * below, because in some cases the module may immediately
2451 : * call spdk_bdev_module_fini_start_done() and re-enter
2452 : * this function to continue iterating. */
2453 0 : g_resume_bdev_module = bdev_module;
2454 0 : }
2455 :
2456 189 : if (bdev_module->fini_start) {
2457 23 : bdev_module->fini_start();
2458 23 : }
2459 :
2460 189 : if (bdev_module->async_fini_start) {
2461 0 : return;
2462 : }
2463 :
2464 189 : bdev_module = TAILQ_PREV(bdev_module, bdev_module_list, internal.tailq);
2465 : }
2466 :
2467 65 : g_resume_bdev_module = NULL;
2468 :
2469 65 : bdev_finish_unregister_bdevs_iter(NULL, 0);
2470 65 : }
2471 :
2472 : void
2473 0 : spdk_bdev_module_fini_start_done(void)
2474 : {
2475 0 : if (spdk_get_thread() != g_fini_thread) {
2476 0 : spdk_thread_send_msg(g_fini_thread, bdev_module_fini_start_iter, NULL);
2477 0 : } else {
2478 0 : bdev_module_fini_start_iter(NULL);
2479 : }
2480 0 : }
2481 :
2482 : static void
2483 65 : bdev_finish_wait_for_examine_done(void *cb_arg)
2484 : {
2485 65 : bdev_module_fini_start_iter(NULL);
2486 65 : }
2487 :
2488 : static void bdev_open_async_fini(void);
2489 :
2490 : void
2491 65 : spdk_bdev_finish(spdk_bdev_fini_cb cb_fn, void *cb_arg)
2492 : {
2493 : int rc;
2494 :
2495 65 : assert(cb_fn != NULL);
2496 :
2497 65 : g_fini_thread = spdk_get_thread();
2498 :
2499 65 : g_fini_cb_fn = cb_fn;
2500 65 : g_fini_cb_arg = cb_arg;
2501 :
2502 65 : bdev_open_async_fini();
2503 :
2504 65 : rc = spdk_bdev_wait_for_examine(bdev_finish_wait_for_examine_done, NULL);
2505 65 : if (rc != 0) {
2506 0 : SPDK_ERRLOG("wait_for_examine failed: %s\n", spdk_strerror(-rc));
2507 0 : bdev_finish_wait_for_examine_done(NULL);
2508 0 : }
2509 65 : }
2510 :
2511 : struct spdk_bdev_io *
2512 697 : bdev_channel_get_io(struct spdk_bdev_channel *channel)
2513 : {
2514 697 : struct spdk_bdev_mgmt_channel *ch = channel->shared_resource->mgmt_ch;
2515 : struct spdk_bdev_io *bdev_io;
2516 :
2517 697 : if (ch->per_thread_cache_count > 0) {
2518 637 : bdev_io = STAILQ_FIRST(&ch->per_thread_cache);
2519 637 : STAILQ_REMOVE_HEAD(&ch->per_thread_cache, internal.buf_link);
2520 637 : ch->per_thread_cache_count--;
2521 697 : } else if (spdk_unlikely(!TAILQ_EMPTY(&ch->io_wait_queue))) {
2522 : /*
2523 : * Don't try to look for bdev_ios in the global pool if there are
2524 : * waiters on bdev_ios - we don't want this caller to jump the line.
2525 : */
2526 0 : bdev_io = NULL;
2527 0 : } else {
2528 60 : bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
2529 : }
2530 :
2531 697 : return bdev_io;
2532 : }
2533 :
2534 : void
2535 691 : spdk_bdev_free_io(struct spdk_bdev_io *bdev_io)
2536 : {
2537 : struct spdk_bdev_mgmt_channel *ch;
2538 :
2539 691 : assert(bdev_io != NULL);
2540 691 : assert(bdev_io->internal.status != SPDK_BDEV_IO_STATUS_PENDING);
2541 :
2542 691 : ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
2543 :
2544 691 : if (bdev_io->internal.f.has_buf) {
2545 16 : bdev_io_put_buf(bdev_io);
2546 16 : }
2547 :
2548 691 : if (ch->per_thread_cache_count < ch->bdev_io_cache_size) {
2549 637 : ch->per_thread_cache_count++;
2550 637 : STAILQ_INSERT_HEAD(&ch->per_thread_cache, bdev_io, internal.buf_link);
2551 641 : while (ch->per_thread_cache_count > 0 && !TAILQ_EMPTY(&ch->io_wait_queue)) {
2552 : struct spdk_bdev_io_wait_entry *entry;
2553 :
2554 4 : entry = TAILQ_FIRST(&ch->io_wait_queue);
2555 4 : TAILQ_REMOVE(&ch->io_wait_queue, entry, link);
2556 4 : entry->cb_fn(entry->cb_arg);
2557 : }
2558 637 : } else {
2559 : /* We should never have a full cache with entries on the io wait queue. */
2560 54 : assert(TAILQ_EMPTY(&ch->io_wait_queue));
2561 54 : spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io);
2562 : }
2563 691 : }
2564 :
2565 : static bool
2566 72 : bdev_qos_is_iops_rate_limit(enum spdk_bdev_qos_rate_limit_type limit)
2567 : {
2568 72 : assert(limit != SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES);
2569 :
2570 72 : switch (limit) {
2571 : case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT:
2572 18 : return true;
2573 : case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT:
2574 : case SPDK_BDEV_QOS_R_BPS_RATE_LIMIT:
2575 : case SPDK_BDEV_QOS_W_BPS_RATE_LIMIT:
2576 54 : return false;
2577 0 : case SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES:
2578 : default:
2579 0 : return false;
2580 : }
2581 72 : }
2582 :
2583 : static bool
2584 25 : bdev_qos_io_to_limit(struct spdk_bdev_io *bdev_io)
2585 : {
2586 25 : switch (bdev_io->type) {
2587 : case SPDK_BDEV_IO_TYPE_NVME_IO:
2588 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
2589 : case SPDK_BDEV_IO_TYPE_READ:
2590 : case SPDK_BDEV_IO_TYPE_WRITE:
2591 23 : return true;
2592 : case SPDK_BDEV_IO_TYPE_ZCOPY:
2593 0 : if (bdev_io->u.bdev.zcopy.start) {
2594 0 : return true;
2595 : } else {
2596 0 : return false;
2597 : }
2598 : default:
2599 2 : return false;
2600 : }
2601 25 : }
2602 :
2603 : static bool
2604 33 : bdev_is_read_io(struct spdk_bdev_io *bdev_io)
2605 : {
2606 33 : switch (bdev_io->type) {
2607 : case SPDK_BDEV_IO_TYPE_NVME_IO:
2608 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
2609 : /* Bit 1 (0x2) set for read operation */
2610 0 : if (bdev_io->u.nvme_passthru.cmd.opc & SPDK_NVME_OPC_READ) {
2611 0 : return true;
2612 : } else {
2613 0 : return false;
2614 : }
2615 : case SPDK_BDEV_IO_TYPE_READ:
2616 30 : return true;
2617 : case SPDK_BDEV_IO_TYPE_ZCOPY:
2618 : /* Populate to read from disk */
2619 0 : if (bdev_io->u.bdev.zcopy.populate) {
2620 0 : return true;
2621 : } else {
2622 0 : return false;
2623 : }
2624 : default:
2625 3 : return false;
2626 : }
2627 33 : }
2628 :
2629 : static uint64_t
2630 43 : bdev_get_io_size_in_byte(struct spdk_bdev_io *bdev_io)
2631 : {
2632 43 : struct spdk_bdev *bdev = bdev_io->bdev;
2633 :
2634 43 : switch (bdev_io->type) {
2635 : case SPDK_BDEV_IO_TYPE_NVME_IO:
2636 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
2637 0 : return bdev_io->u.nvme_passthru.nbytes;
2638 : case SPDK_BDEV_IO_TYPE_READ:
2639 : case SPDK_BDEV_IO_TYPE_WRITE:
2640 43 : return bdev_io->u.bdev.num_blocks * bdev->blocklen;
2641 : case SPDK_BDEV_IO_TYPE_ZCOPY:
2642 : /* Track the data in the start phase only */
2643 0 : if (bdev_io->u.bdev.zcopy.start) {
2644 0 : return bdev_io->u.bdev.num_blocks * bdev->blocklen;
2645 : } else {
2646 0 : return 0;
2647 : }
2648 : default:
2649 0 : return 0;
2650 : }
2651 43 : }
2652 :
2653 : static inline bool
2654 64 : bdev_qos_rw_queue_io(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io, uint64_t delta)
2655 : {
2656 : int64_t remaining_this_timeslice;
2657 :
2658 64 : if (!limit->max_per_timeslice) {
2659 : /* The QoS is disabled */
2660 0 : return false;
2661 : }
2662 :
2663 64 : remaining_this_timeslice = __atomic_sub_fetch(&limit->remaining_this_timeslice, delta,
2664 : __ATOMIC_RELAXED);
2665 64 : if (remaining_this_timeslice + (int64_t)delta > 0) {
2666 : /* There was still a quota for this delta -> the IO shouldn't be queued
2667 : *
2668 : * We allow a slight quota overrun here so an IO bigger than the per-timeslice
2669 : * quota can be allowed once a while. Such overrun then taken into account in
2670 : * the QoS poller, where the next timeslice quota is calculated.
2671 : */
2672 59 : return false;
2673 : }
2674 :
2675 : /* There was no quota for this delta -> the IO should be queued
2676 : * The remaining_this_timeslice must be rewinded so it reflects the real
2677 : * amount of IOs or bytes allowed.
2678 : */
2679 5 : __atomic_add_fetch(
2680 5 : &limit->remaining_this_timeslice, delta, __ATOMIC_RELAXED);
2681 5 : return true;
2682 64 : }
2683 :
2684 : static inline void
2685 5 : bdev_qos_rw_rewind_io(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io, uint64_t delta)
2686 : {
2687 5 : __atomic_add_fetch(&limit->remaining_this_timeslice, delta, __ATOMIC_RELAXED);
2688 5 : }
2689 :
2690 : static bool
2691 23 : bdev_qos_rw_iops_queue(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2692 : {
2693 23 : return bdev_qos_rw_queue_io(limit, io, 1);
2694 : }
2695 :
2696 : static void
2697 3 : bdev_qos_rw_iops_rewind_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2698 : {
2699 3 : bdev_qos_rw_rewind_io(limit, io, 1);
2700 3 : }
2701 :
2702 : static bool
2703 41 : bdev_qos_rw_bps_queue(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2704 : {
2705 41 : return bdev_qos_rw_queue_io(limit, io, bdev_get_io_size_in_byte(io));
2706 : }
2707 :
2708 : static void
2709 2 : bdev_qos_rw_bps_rewind_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2710 : {
2711 2 : bdev_qos_rw_rewind_io(limit, io, bdev_get_io_size_in_byte(io));
2712 2 : }
2713 :
2714 : static bool
2715 19 : bdev_qos_r_bps_queue(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2716 : {
2717 19 : if (bdev_is_read_io(io) == false) {
2718 1 : return false;
2719 : }
2720 :
2721 18 : return bdev_qos_rw_bps_queue(limit, io);
2722 19 : }
2723 :
2724 : static void
2725 0 : bdev_qos_r_bps_rewind_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2726 : {
2727 0 : if (bdev_is_read_io(io) != false) {
2728 0 : bdev_qos_rw_rewind_io(limit, io, bdev_get_io_size_in_byte(io));
2729 0 : }
2730 0 : }
2731 :
2732 : static bool
2733 14 : bdev_qos_w_bps_queue(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2734 : {
2735 14 : if (bdev_is_read_io(io) == true) {
2736 12 : return false;
2737 : }
2738 :
2739 2 : return bdev_qos_rw_bps_queue(limit, io);
2740 14 : }
2741 :
2742 : static void
2743 0 : bdev_qos_w_bps_rewind_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2744 : {
2745 0 : if (bdev_is_read_io(io) != true) {
2746 0 : bdev_qos_rw_rewind_io(limit, io, bdev_get_io_size_in_byte(io));
2747 0 : }
2748 0 : }
2749 :
2750 : static void
2751 10 : bdev_qos_set_ops(struct spdk_bdev_qos *qos)
2752 : {
2753 : int i;
2754 :
2755 50 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
2756 40 : if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
2757 15 : qos->rate_limits[i].queue_io = NULL;
2758 15 : continue;
2759 : }
2760 :
2761 25 : switch (i) {
2762 : case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT:
2763 9 : qos->rate_limits[i].queue_io = bdev_qos_rw_iops_queue;
2764 9 : qos->rate_limits[i].rewind_quota = bdev_qos_rw_iops_rewind_quota;
2765 9 : break;
2766 : case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT:
2767 7 : qos->rate_limits[i].queue_io = bdev_qos_rw_bps_queue;
2768 7 : qos->rate_limits[i].rewind_quota = bdev_qos_rw_bps_rewind_quota;
2769 7 : break;
2770 : case SPDK_BDEV_QOS_R_BPS_RATE_LIMIT:
2771 5 : qos->rate_limits[i].queue_io = bdev_qos_r_bps_queue;
2772 5 : qos->rate_limits[i].rewind_quota = bdev_qos_r_bps_rewind_quota;
2773 5 : break;
2774 : case SPDK_BDEV_QOS_W_BPS_RATE_LIMIT:
2775 4 : qos->rate_limits[i].queue_io = bdev_qos_w_bps_queue;
2776 4 : qos->rate_limits[i].rewind_quota = bdev_qos_w_bps_rewind_quota;
2777 4 : break;
2778 : default:
2779 0 : break;
2780 : }
2781 25 : }
2782 10 : }
2783 :
2784 : static void
2785 6 : _bdev_io_complete_in_submit(struct spdk_bdev_channel *bdev_ch,
2786 : struct spdk_bdev_io *bdev_io,
2787 : enum spdk_bdev_io_status status)
2788 : {
2789 6 : bdev_io->internal.f.in_submit_request = true;
2790 6 : bdev_io_increment_outstanding(bdev_ch, bdev_ch->shared_resource);
2791 6 : spdk_bdev_io_complete(bdev_io, status);
2792 6 : bdev_io->internal.f.in_submit_request = false;
2793 6 : }
2794 :
2795 : static inline void
2796 574 : bdev_io_do_submit(struct spdk_bdev_channel *bdev_ch, struct spdk_bdev_io *bdev_io)
2797 : {
2798 574 : struct spdk_bdev *bdev = bdev_io->bdev;
2799 574 : struct spdk_io_channel *ch = bdev_ch->channel;
2800 574 : struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
2801 :
2802 574 : if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT)) {
2803 16 : struct spdk_bdev_mgmt_channel *mgmt_channel = shared_resource->mgmt_ch;
2804 16 : struct spdk_bdev_io *bio_to_abort = bdev_io->u.abort.bio_to_abort;
2805 :
2806 16 : if (bdev_abort_queued_io(&shared_resource->nomem_io, bio_to_abort) ||
2807 16 : bdev_abort_buf_io(mgmt_channel, bio_to_abort)) {
2808 0 : _bdev_io_complete_in_submit(bdev_ch, bdev_io,
2809 : SPDK_BDEV_IO_STATUS_SUCCESS);
2810 0 : return;
2811 : }
2812 16 : }
2813 :
2814 574 : if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE &&
2815 : bdev_io->bdev->split_on_write_unit &&
2816 : bdev_io->u.bdev.num_blocks < bdev_io->bdev->write_unit_size)) {
2817 4 : SPDK_ERRLOG("IO num_blocks %lu does not match the write_unit_size %u\n",
2818 : bdev_io->u.bdev.num_blocks, bdev_io->bdev->write_unit_size);
2819 4 : _bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
2820 4 : return;
2821 : }
2822 :
2823 570 : if (spdk_likely(TAILQ_EMPTY(&shared_resource->nomem_io))) {
2824 527 : bdev_io_increment_outstanding(bdev_ch, shared_resource);
2825 527 : bdev_io->internal.f.in_submit_request = true;
2826 527 : bdev_submit_request(bdev, ch, bdev_io);
2827 527 : bdev_io->internal.f.in_submit_request = false;
2828 527 : } else {
2829 43 : bdev_queue_nomem_io_tail(shared_resource, bdev_io, BDEV_IO_RETRY_STATE_SUBMIT);
2830 43 : if (shared_resource->nomem_threshold == 0 && shared_resource->io_outstanding == 0) {
2831 : /* Special case when we have nomem IOs and no outstanding IOs which completions
2832 : * could trigger retry of queued IOs */
2833 0 : bdev_shared_ch_retry_io(shared_resource);
2834 0 : }
2835 : }
2836 574 : }
2837 :
2838 : static bool
2839 25 : bdev_qos_queue_io(struct spdk_bdev_qos *qos, struct spdk_bdev_io *bdev_io)
2840 : {
2841 : int i;
2842 :
2843 25 : if (bdev_qos_io_to_limit(bdev_io) == true) {
2844 100 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
2845 82 : if (!qos->rate_limits[i].queue_io) {
2846 5 : continue;
2847 : }
2848 :
2849 231 : if (qos->rate_limits[i].queue_io(&qos->rate_limits[i],
2850 154 : bdev_io) == true) {
2851 10 : for (i -= 1; i >= 0 ; i--) {
2852 5 : if (!qos->rate_limits[i].queue_io) {
2853 0 : continue;
2854 : }
2855 :
2856 5 : qos->rate_limits[i].rewind_quota(&qos->rate_limits[i], bdev_io);
2857 5 : }
2858 5 : return true;
2859 : }
2860 72 : }
2861 18 : }
2862 :
2863 20 : return false;
2864 25 : }
2865 :
2866 : static int
2867 27 : bdev_qos_io_submit(struct spdk_bdev_channel *ch, struct spdk_bdev_qos *qos)
2868 : {
2869 27 : struct spdk_bdev_io *bdev_io = NULL, *tmp = NULL;
2870 27 : int submitted_ios = 0;
2871 :
2872 52 : TAILQ_FOREACH_SAFE(bdev_io, &ch->qos_queued_io, internal.link, tmp) {
2873 25 : if (!bdev_qos_queue_io(qos, bdev_io)) {
2874 20 : TAILQ_REMOVE(&ch->qos_queued_io, bdev_io, internal.link);
2875 20 : bdev_io_do_submit(ch, bdev_io);
2876 :
2877 20 : submitted_ios++;
2878 20 : }
2879 25 : }
2880 :
2881 27 : return submitted_ios;
2882 : }
2883 :
2884 : static void
2885 2 : bdev_queue_io_wait_with_cb(struct spdk_bdev_io *bdev_io, spdk_bdev_io_wait_cb cb_fn)
2886 : {
2887 : int rc;
2888 :
2889 2 : bdev_io->internal.waitq_entry.bdev = bdev_io->bdev;
2890 2 : bdev_io->internal.waitq_entry.cb_fn = cb_fn;
2891 2 : bdev_io->internal.waitq_entry.cb_arg = bdev_io;
2892 4 : rc = spdk_bdev_queue_io_wait(bdev_io->bdev, spdk_io_channel_from_ctx(bdev_io->internal.ch),
2893 2 : &bdev_io->internal.waitq_entry);
2894 2 : if (rc != 0) {
2895 0 : SPDK_ERRLOG("Queue IO failed, rc=%d\n", rc);
2896 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
2897 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
2898 0 : }
2899 2 : }
2900 :
2901 : static bool
2902 621 : bdev_rw_should_split(struct spdk_bdev_io *bdev_io)
2903 : {
2904 : uint32_t io_boundary;
2905 621 : struct spdk_bdev *bdev = bdev_io->bdev;
2906 621 : uint32_t max_segment_size = bdev->max_segment_size;
2907 621 : uint32_t max_size = bdev->max_rw_size;
2908 621 : int max_segs = bdev->max_num_segments;
2909 :
2910 621 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE && bdev->split_on_write_unit) {
2911 24 : io_boundary = bdev->write_unit_size;
2912 621 : } else if (bdev->split_on_optimal_io_boundary) {
2913 168 : io_boundary = bdev->optimal_io_boundary;
2914 168 : } else {
2915 429 : io_boundary = 0;
2916 : }
2917 :
2918 621 : if (spdk_likely(!io_boundary && !max_segs && !max_segment_size && !max_size)) {
2919 243 : return false;
2920 : }
2921 :
2922 378 : if (io_boundary) {
2923 : uint64_t start_stripe, end_stripe;
2924 :
2925 192 : start_stripe = bdev_io->u.bdev.offset_blocks;
2926 192 : end_stripe = start_stripe + bdev_io->u.bdev.num_blocks - 1;
2927 : /* Avoid expensive div operations if possible. These spdk_u32 functions are very cheap. */
2928 192 : if (spdk_likely(spdk_u32_is_pow2(io_boundary))) {
2929 192 : start_stripe >>= spdk_u32log2(io_boundary);
2930 192 : end_stripe >>= spdk_u32log2(io_boundary);
2931 192 : } else {
2932 0 : start_stripe /= io_boundary;
2933 0 : end_stripe /= io_boundary;
2934 : }
2935 :
2936 192 : if (start_stripe != end_stripe) {
2937 75 : return true;
2938 : }
2939 117 : }
2940 :
2941 303 : if (max_segs) {
2942 150 : if (bdev_io->u.bdev.iovcnt > max_segs) {
2943 15 : return true;
2944 : }
2945 135 : }
2946 :
2947 288 : if (max_segment_size) {
2948 470 : for (int i = 0; i < bdev_io->u.bdev.iovcnt; i++) {
2949 346 : if (bdev_io->u.bdev.iovs[i].iov_len > max_segment_size) {
2950 12 : return true;
2951 : }
2952 334 : }
2953 124 : }
2954 :
2955 276 : if (max_size) {
2956 52 : if (bdev_io->u.bdev.num_blocks > max_size) {
2957 7 : return true;
2958 : }
2959 45 : }
2960 :
2961 269 : return false;
2962 621 : }
2963 :
2964 : static bool
2965 24 : bdev_unmap_should_split(struct spdk_bdev_io *bdev_io)
2966 : {
2967 : uint32_t num_unmap_segments;
2968 :
2969 24 : if (!bdev_io->bdev->max_unmap || !bdev_io->bdev->max_unmap_segments) {
2970 3 : return false;
2971 : }
2972 21 : num_unmap_segments = spdk_divide_round_up(bdev_io->u.bdev.num_blocks, bdev_io->bdev->max_unmap);
2973 21 : if (num_unmap_segments > bdev_io->bdev->max_unmap_segments) {
2974 4 : return true;
2975 : }
2976 :
2977 17 : return false;
2978 24 : }
2979 :
2980 : static bool
2981 37 : bdev_write_zeroes_should_split(struct spdk_bdev_io *bdev_io)
2982 : {
2983 37 : if (!bdev_io->bdev->max_write_zeroes) {
2984 4 : return false;
2985 : }
2986 :
2987 33 : if (bdev_io->u.bdev.num_blocks > bdev_io->bdev->max_write_zeroes) {
2988 10 : return true;
2989 : }
2990 :
2991 23 : return false;
2992 37 : }
2993 :
2994 : static bool
2995 30 : bdev_copy_should_split(struct spdk_bdev_io *bdev_io)
2996 : {
2997 30 : if (bdev_io->bdev->max_copy != 0 &&
2998 25 : bdev_io->u.bdev.num_blocks > bdev_io->bdev->max_copy) {
2999 6 : return true;
3000 : }
3001 :
3002 24 : return false;
3003 30 : }
3004 :
3005 : static bool
3006 792 : bdev_io_should_split(struct spdk_bdev_io *bdev_io)
3007 : {
3008 792 : switch (bdev_io->type) {
3009 : case SPDK_BDEV_IO_TYPE_READ:
3010 : case SPDK_BDEV_IO_TYPE_WRITE:
3011 621 : return bdev_rw_should_split(bdev_io);
3012 : case SPDK_BDEV_IO_TYPE_UNMAP:
3013 24 : return bdev_unmap_should_split(bdev_io);
3014 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3015 37 : return bdev_write_zeroes_should_split(bdev_io);
3016 : case SPDK_BDEV_IO_TYPE_COPY:
3017 30 : return bdev_copy_should_split(bdev_io);
3018 : default:
3019 80 : return false;
3020 : }
3021 792 : }
3022 :
3023 : static uint32_t
3024 249 : _to_next_boundary(uint64_t offset, uint32_t boundary)
3025 : {
3026 249 : return (boundary - (offset % boundary));
3027 : }
3028 :
3029 : static void bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
3030 :
3031 : static void _bdev_rw_split(void *_bdev_io);
3032 :
3033 : static void bdev_unmap_split(struct spdk_bdev_io *bdev_io);
3034 :
3035 : static void
3036 0 : _bdev_unmap_split(void *_bdev_io)
3037 : {
3038 0 : return bdev_unmap_split((struct spdk_bdev_io *)_bdev_io);
3039 : }
3040 :
3041 : static void bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io);
3042 :
3043 : static void
3044 0 : _bdev_write_zeroes_split(void *_bdev_io)
3045 : {
3046 0 : return bdev_write_zeroes_split((struct spdk_bdev_io *)_bdev_io);
3047 : }
3048 :
3049 : static void bdev_copy_split(struct spdk_bdev_io *bdev_io);
3050 :
3051 : static void
3052 0 : _bdev_copy_split(void *_bdev_io)
3053 : {
3054 0 : return bdev_copy_split((struct spdk_bdev_io *)_bdev_io);
3055 : }
3056 :
3057 : static int
3058 305 : bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt, void *md_buf,
3059 : uint64_t num_blocks, uint64_t *offset, uint64_t *remaining)
3060 : {
3061 : int rc;
3062 : uint64_t current_offset, current_remaining, current_src_offset;
3063 : spdk_bdev_io_wait_cb io_wait_fn;
3064 :
3065 305 : current_offset = *offset;
3066 305 : current_remaining = *remaining;
3067 :
3068 305 : assert(bdev_io->internal.f.split);
3069 :
3070 305 : bdev_io->internal.split.outstanding++;
3071 :
3072 305 : io_wait_fn = _bdev_rw_split;
3073 305 : switch (bdev_io->type) {
3074 : case SPDK_BDEV_IO_TYPE_READ:
3075 196 : assert(bdev_io->u.bdev.accel_sequence == NULL);
3076 392 : rc = bdev_readv_blocks_with_md(bdev_io->internal.desc,
3077 196 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
3078 196 : iov, iovcnt, md_buf, current_offset,
3079 196 : num_blocks,
3080 196 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain : NULL,
3081 196 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain_ctx : NULL,
3082 : NULL,
3083 196 : bdev_io->u.bdev.dif_check_flags,
3084 196 : bdev_io_split_done, bdev_io);
3085 196 : break;
3086 : case SPDK_BDEV_IO_TYPE_WRITE:
3087 50 : assert(bdev_io->u.bdev.accel_sequence == NULL);
3088 100 : rc = bdev_writev_blocks_with_md(bdev_io->internal.desc,
3089 50 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
3090 50 : iov, iovcnt, md_buf, current_offset,
3091 50 : num_blocks,
3092 50 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain : NULL,
3093 50 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain_ctx : NULL,
3094 : NULL,
3095 50 : bdev_io->u.bdev.dif_check_flags,
3096 50 : bdev_io->u.bdev.nvme_cdw12.raw,
3097 50 : bdev_io->u.bdev.nvme_cdw13.raw,
3098 50 : bdev_io_split_done, bdev_io);
3099 50 : break;
3100 : case SPDK_BDEV_IO_TYPE_UNMAP:
3101 17 : io_wait_fn = _bdev_unmap_split;
3102 34 : rc = spdk_bdev_unmap_blocks(bdev_io->internal.desc,
3103 17 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
3104 17 : current_offset, num_blocks,
3105 17 : bdev_io_split_done, bdev_io);
3106 17 : break;
3107 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3108 23 : io_wait_fn = _bdev_write_zeroes_split;
3109 46 : rc = spdk_bdev_write_zeroes_blocks(bdev_io->internal.desc,
3110 23 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
3111 23 : current_offset, num_blocks,
3112 23 : bdev_io_split_done, bdev_io);
3113 23 : break;
3114 : case SPDK_BDEV_IO_TYPE_COPY:
3115 19 : io_wait_fn = _bdev_copy_split;
3116 38 : current_src_offset = bdev_io->u.bdev.copy.src_offset_blocks +
3117 19 : (current_offset - bdev_io->u.bdev.offset_blocks);
3118 38 : rc = spdk_bdev_copy_blocks(bdev_io->internal.desc,
3119 19 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
3120 19 : current_offset, current_src_offset, num_blocks,
3121 19 : bdev_io_split_done, bdev_io);
3122 19 : break;
3123 : default:
3124 0 : assert(false);
3125 : rc = -EINVAL;
3126 : break;
3127 : }
3128 :
3129 305 : if (rc == 0) {
3130 301 : current_offset += num_blocks;
3131 301 : current_remaining -= num_blocks;
3132 301 : bdev_io->internal.split.current_offset_blocks = current_offset;
3133 301 : bdev_io->internal.split.remaining_num_blocks = current_remaining;
3134 301 : *offset = current_offset;
3135 301 : *remaining = current_remaining;
3136 301 : } else {
3137 4 : bdev_io->internal.split.outstanding--;
3138 4 : if (rc == -ENOMEM) {
3139 4 : if (bdev_io->internal.split.outstanding == 0) {
3140 : /* No I/O is outstanding. Hence we should wait here. */
3141 1 : bdev_queue_io_wait_with_cb(bdev_io, io_wait_fn);
3142 1 : }
3143 4 : } else {
3144 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
3145 0 : if (bdev_io->internal.split.outstanding == 0) {
3146 0 : bdev_ch_remove_from_io_submitted(bdev_io);
3147 0 : spdk_trace_record(TRACE_BDEV_IO_DONE, bdev_io->internal.ch->trace_id,
3148 : 0, (uintptr_t)bdev_io, bdev_io->internal.caller_ctx,
3149 : bdev_io->internal.ch->queue_depth);
3150 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
3151 0 : }
3152 : }
3153 : }
3154 :
3155 305 : return rc;
3156 : }
3157 :
3158 : static void
3159 67 : _bdev_rw_split(void *_bdev_io)
3160 : {
3161 : struct iovec *parent_iov, *iov;
3162 67 : struct spdk_bdev_io *bdev_io = _bdev_io;
3163 67 : struct spdk_bdev *bdev = bdev_io->bdev;
3164 : uint64_t parent_offset, current_offset, remaining;
3165 : uint32_t parent_iov_offset, parent_iovcnt, parent_iovpos, child_iovcnt;
3166 : uint32_t to_next_boundary, to_next_boundary_bytes, to_last_block_bytes;
3167 : uint32_t iovcnt, iov_len, child_iovsize;
3168 67 : uint32_t blocklen = bdev->blocklen;
3169 : uint32_t io_boundary;
3170 67 : uint32_t max_segment_size = bdev->max_segment_size;
3171 67 : uint32_t max_child_iovcnt = bdev->max_num_segments;
3172 67 : uint32_t max_size = bdev->max_rw_size;
3173 67 : void *md_buf = NULL;
3174 : int rc;
3175 :
3176 67 : max_size = max_size ? max_size : UINT32_MAX;
3177 67 : max_segment_size = max_segment_size ? max_segment_size : UINT32_MAX;
3178 67 : max_child_iovcnt = max_child_iovcnt ? spdk_min(max_child_iovcnt, SPDK_BDEV_IO_NUM_CHILD_IOV) :
3179 : SPDK_BDEV_IO_NUM_CHILD_IOV;
3180 :
3181 67 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE && bdev->split_on_write_unit) {
3182 5 : io_boundary = bdev->write_unit_size;
3183 67 : } else if (bdev->split_on_optimal_io_boundary) {
3184 40 : io_boundary = bdev->optimal_io_boundary;
3185 40 : } else {
3186 22 : io_boundary = UINT32_MAX;
3187 : }
3188 :
3189 67 : assert(bdev_io->internal.f.split);
3190 :
3191 67 : remaining = bdev_io->internal.split.remaining_num_blocks;
3192 67 : current_offset = bdev_io->internal.split.current_offset_blocks;
3193 67 : parent_offset = bdev_io->u.bdev.offset_blocks;
3194 67 : parent_iov_offset = (current_offset - parent_offset) * blocklen;
3195 67 : parent_iovcnt = bdev_io->u.bdev.iovcnt;
3196 :
3197 420 : for (parent_iovpos = 0; parent_iovpos < parent_iovcnt; parent_iovpos++) {
3198 420 : parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
3199 420 : if (parent_iov_offset < parent_iov->iov_len) {
3200 67 : break;
3201 : }
3202 353 : parent_iov_offset -= parent_iov->iov_len;
3203 353 : }
3204 :
3205 67 : child_iovcnt = 0;
3206 573 : while (remaining > 0 && parent_iovpos < parent_iovcnt &&
3207 264 : child_iovcnt < SPDK_BDEV_IO_NUM_CHILD_IOV) {
3208 249 : to_next_boundary = _to_next_boundary(current_offset, io_boundary);
3209 249 : to_next_boundary = spdk_min(remaining, to_next_boundary);
3210 249 : to_next_boundary = spdk_min(max_size, to_next_boundary);
3211 249 : to_next_boundary_bytes = to_next_boundary * blocklen;
3212 :
3213 249 : iov = &bdev_io->child_iov[child_iovcnt];
3214 249 : iovcnt = 0;
3215 :
3216 249 : if (bdev_io->u.bdev.md_buf) {
3217 48 : md_buf = (char *)bdev_io->u.bdev.md_buf +
3218 24 : (current_offset - parent_offset) * spdk_bdev_get_md_size(bdev);
3219 24 : }
3220 :
3221 249 : child_iovsize = spdk_min(SPDK_BDEV_IO_NUM_CHILD_IOV - child_iovcnt, max_child_iovcnt);
3222 1810 : while (to_next_boundary_bytes > 0 && parent_iovpos < parent_iovcnt &&
3223 836 : iovcnt < child_iovsize) {
3224 725 : parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
3225 725 : iov_len = parent_iov->iov_len - parent_iov_offset;
3226 :
3227 725 : iov_len = spdk_min(iov_len, max_segment_size);
3228 725 : iov_len = spdk_min(iov_len, to_next_boundary_bytes);
3229 725 : to_next_boundary_bytes -= iov_len;
3230 :
3231 725 : bdev_io->child_iov[child_iovcnt].iov_base = parent_iov->iov_base + parent_iov_offset;
3232 725 : bdev_io->child_iov[child_iovcnt].iov_len = iov_len;
3233 :
3234 725 : if (iov_len < parent_iov->iov_len - parent_iov_offset) {
3235 183 : parent_iov_offset += iov_len;
3236 183 : } else {
3237 542 : parent_iovpos++;
3238 542 : parent_iov_offset = 0;
3239 : }
3240 725 : child_iovcnt++;
3241 725 : iovcnt++;
3242 : }
3243 :
3244 249 : if (to_next_boundary_bytes > 0) {
3245 : /* We had to stop this child I/O early because we ran out of
3246 : * child_iov space or were limited by max_num_segments.
3247 : * Ensure the iovs to be aligned with block size and
3248 : * then adjust to_next_boundary before starting the
3249 : * child I/O.
3250 : */
3251 111 : assert(child_iovcnt == SPDK_BDEV_IO_NUM_CHILD_IOV ||
3252 : iovcnt == child_iovsize);
3253 111 : to_last_block_bytes = to_next_boundary_bytes % blocklen;
3254 111 : if (to_last_block_bytes != 0) {
3255 24 : uint32_t child_iovpos = child_iovcnt - 1;
3256 : /* don't decrease child_iovcnt when it equals to SPDK_BDEV_IO_NUM_CHILD_IOV
3257 : * so the loop will naturally end
3258 : */
3259 :
3260 24 : to_last_block_bytes = blocklen - to_last_block_bytes;
3261 24 : to_next_boundary_bytes += to_last_block_bytes;
3262 53 : while (to_last_block_bytes > 0 && iovcnt > 0) {
3263 32 : iov_len = spdk_min(to_last_block_bytes,
3264 : bdev_io->child_iov[child_iovpos].iov_len);
3265 32 : bdev_io->child_iov[child_iovpos].iov_len -= iov_len;
3266 32 : if (bdev_io->child_iov[child_iovpos].iov_len == 0) {
3267 15 : child_iovpos--;
3268 15 : if (--iovcnt == 0) {
3269 : /* If the child IO is less than a block size just return.
3270 : * If the first child IO of any split round is less than
3271 : * a block size, an error exit.
3272 : */
3273 3 : if (bdev_io->internal.split.outstanding == 0) {
3274 1 : SPDK_ERRLOG("The first child io was less than a block size\n");
3275 1 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
3276 1 : bdev_ch_remove_from_io_submitted(bdev_io);
3277 1 : spdk_trace_record(TRACE_BDEV_IO_DONE, bdev_io->internal.ch->trace_id,
3278 : 0, (uintptr_t)bdev_io, bdev_io->internal.caller_ctx,
3279 : bdev_io->internal.ch->queue_depth);
3280 1 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
3281 1 : }
3282 :
3283 3 : return;
3284 : }
3285 12 : }
3286 :
3287 29 : to_last_block_bytes -= iov_len;
3288 :
3289 29 : if (parent_iov_offset == 0) {
3290 14 : parent_iovpos--;
3291 14 : parent_iov_offset = bdev_io->u.bdev.iovs[parent_iovpos].iov_len;
3292 14 : }
3293 29 : parent_iov_offset -= iov_len;
3294 : }
3295 :
3296 21 : assert(to_last_block_bytes == 0);
3297 21 : }
3298 108 : to_next_boundary -= to_next_boundary_bytes / blocklen;
3299 108 : }
3300 :
3301 246 : rc = bdev_io_split_submit(bdev_io, iov, iovcnt, md_buf, to_next_boundary,
3302 : ¤t_offset, &remaining);
3303 246 : if (spdk_unlikely(rc)) {
3304 4 : return;
3305 : }
3306 : }
3307 67 : }
3308 :
3309 : static void
3310 3 : bdev_unmap_split(struct spdk_bdev_io *bdev_io)
3311 : {
3312 : uint64_t offset, unmap_blocks, remaining, max_unmap_blocks;
3313 3 : uint32_t num_children_reqs = 0;
3314 : int rc;
3315 :
3316 3 : assert(bdev_io->internal.f.split);
3317 :
3318 3 : offset = bdev_io->internal.split.current_offset_blocks;
3319 3 : remaining = bdev_io->internal.split.remaining_num_blocks;
3320 3 : max_unmap_blocks = bdev_io->bdev->max_unmap * bdev_io->bdev->max_unmap_segments;
3321 :
3322 20 : while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS)) {
3323 17 : unmap_blocks = spdk_min(remaining, max_unmap_blocks);
3324 :
3325 17 : rc = bdev_io_split_submit(bdev_io, NULL, 0, NULL, unmap_blocks,
3326 : &offset, &remaining);
3327 17 : if (spdk_likely(rc == 0)) {
3328 17 : num_children_reqs++;
3329 17 : } else {
3330 0 : return;
3331 : }
3332 : }
3333 3 : }
3334 :
3335 : static void
3336 6 : bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io)
3337 : {
3338 : uint64_t offset, write_zeroes_blocks, remaining;
3339 6 : uint32_t num_children_reqs = 0;
3340 : int rc;
3341 :
3342 6 : assert(bdev_io->internal.f.split);
3343 :
3344 6 : offset = bdev_io->internal.split.current_offset_blocks;
3345 6 : remaining = bdev_io->internal.split.remaining_num_blocks;
3346 :
3347 29 : while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS)) {
3348 23 : write_zeroes_blocks = spdk_min(remaining, bdev_io->bdev->max_write_zeroes);
3349 :
3350 23 : rc = bdev_io_split_submit(bdev_io, NULL, 0, NULL, write_zeroes_blocks,
3351 : &offset, &remaining);
3352 23 : if (spdk_likely(rc == 0)) {
3353 23 : num_children_reqs++;
3354 23 : } else {
3355 0 : return;
3356 : }
3357 : }
3358 6 : }
3359 :
3360 : static void
3361 4 : bdev_copy_split(struct spdk_bdev_io *bdev_io)
3362 : {
3363 : uint64_t offset, copy_blocks, remaining;
3364 4 : uint32_t num_children_reqs = 0;
3365 : int rc;
3366 :
3367 4 : assert(bdev_io->internal.f.split);
3368 :
3369 4 : offset = bdev_io->internal.split.current_offset_blocks;
3370 4 : remaining = bdev_io->internal.split.remaining_num_blocks;
3371 :
3372 4 : assert(bdev_io->bdev->max_copy != 0);
3373 23 : while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_COPY_REQS)) {
3374 19 : copy_blocks = spdk_min(remaining, bdev_io->bdev->max_copy);
3375 :
3376 19 : rc = bdev_io_split_submit(bdev_io, NULL, 0, NULL, copy_blocks,
3377 : &offset, &remaining);
3378 19 : if (spdk_likely(rc == 0)) {
3379 19 : num_children_reqs++;
3380 19 : } else {
3381 0 : return;
3382 : }
3383 : }
3384 4 : }
3385 :
3386 : static void
3387 58 : parent_bdev_io_complete(void *ctx, int rc)
3388 : {
3389 58 : struct spdk_bdev_io *parent_io = ctx;
3390 :
3391 58 : if (rc) {
3392 0 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
3393 0 : }
3394 :
3395 116 : parent_io->internal.cb(parent_io, parent_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
3396 58 : parent_io->internal.caller_ctx);
3397 58 : }
3398 :
3399 : static void
3400 0 : bdev_io_complete_parent_sequence_cb(void *ctx, int status)
3401 : {
3402 0 : struct spdk_bdev_io *bdev_io = ctx;
3403 :
3404 : /* u.bdev.accel_sequence should have already been cleared at this point */
3405 0 : assert(bdev_io->u.bdev.accel_sequence == NULL);
3406 0 : assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
3407 0 : bdev_io->internal.f.has_accel_sequence = false;
3408 :
3409 0 : if (spdk_unlikely(status != 0)) {
3410 0 : SPDK_ERRLOG("Failed to execute accel sequence, status=%d\n", status);
3411 0 : }
3412 :
3413 0 : parent_bdev_io_complete(bdev_io, status);
3414 0 : }
3415 :
3416 : static void
3417 301 : bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
3418 : {
3419 301 : struct spdk_bdev_io *parent_io = cb_arg;
3420 :
3421 301 : spdk_bdev_free_io(bdev_io);
3422 :
3423 301 : assert(parent_io->internal.f.split);
3424 :
3425 301 : if (!success) {
3426 21 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
3427 : /* If any child I/O failed, stop further splitting process. */
3428 21 : parent_io->internal.split.current_offset_blocks += parent_io->internal.split.remaining_num_blocks;
3429 21 : parent_io->internal.split.remaining_num_blocks = 0;
3430 21 : }
3431 301 : parent_io->internal.split.outstanding--;
3432 301 : if (parent_io->internal.split.outstanding != 0) {
3433 223 : return;
3434 : }
3435 :
3436 : /*
3437 : * Parent I/O finishes when all blocks are consumed.
3438 : */
3439 78 : if (parent_io->internal.split.remaining_num_blocks == 0) {
3440 58 : assert(parent_io->internal.cb != bdev_io_split_done);
3441 58 : bdev_ch_remove_from_io_submitted(parent_io);
3442 58 : spdk_trace_record(TRACE_BDEV_IO_DONE, parent_io->internal.ch->trace_id,
3443 : 0, (uintptr_t)parent_io, bdev_io->internal.caller_ctx,
3444 : parent_io->internal.ch->queue_depth);
3445 :
3446 58 : if (spdk_likely(parent_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS)) {
3447 48 : if (bdev_io_needs_sequence_exec(parent_io->internal.desc, parent_io)) {
3448 0 : bdev_io_exec_sequence(parent_io, bdev_io_complete_parent_sequence_cb);
3449 0 : return;
3450 48 : } else if (parent_io->internal.f.has_bounce_buf &&
3451 0 : !bdev_io_use_accel_sequence(bdev_io)) {
3452 : /* bdev IO will be completed in the callback */
3453 0 : _bdev_io_push_bounce_data_buffer(parent_io, parent_bdev_io_complete);
3454 0 : return;
3455 : }
3456 48 : }
3457 :
3458 58 : parent_bdev_io_complete(parent_io, 0);
3459 58 : return;
3460 : }
3461 :
3462 : /*
3463 : * Continue with the splitting process. This function will complete the parent I/O if the
3464 : * splitting is done.
3465 : */
3466 20 : switch (parent_io->type) {
3467 : case SPDK_BDEV_IO_TYPE_READ:
3468 : case SPDK_BDEV_IO_TYPE_WRITE:
3469 17 : _bdev_rw_split(parent_io);
3470 17 : break;
3471 : case SPDK_BDEV_IO_TYPE_UNMAP:
3472 1 : bdev_unmap_split(parent_io);
3473 1 : break;
3474 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3475 1 : bdev_write_zeroes_split(parent_io);
3476 1 : break;
3477 : case SPDK_BDEV_IO_TYPE_COPY:
3478 1 : bdev_copy_split(parent_io);
3479 1 : break;
3480 : default:
3481 0 : assert(false);
3482 : break;
3483 : }
3484 301 : }
3485 :
3486 : static void bdev_rw_split_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
3487 : bool success);
3488 :
3489 : static void
3490 59 : bdev_io_split(struct spdk_bdev_io *bdev_io)
3491 : {
3492 59 : assert(bdev_io_should_split(bdev_io));
3493 59 : assert(bdev_io->internal.f.split);
3494 :
3495 59 : bdev_io->internal.split.current_offset_blocks = bdev_io->u.bdev.offset_blocks;
3496 59 : bdev_io->internal.split.remaining_num_blocks = bdev_io->u.bdev.num_blocks;
3497 59 : bdev_io->internal.split.outstanding = 0;
3498 59 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
3499 :
3500 59 : switch (bdev_io->type) {
3501 : case SPDK_BDEV_IO_TYPE_READ:
3502 : case SPDK_BDEV_IO_TYPE_WRITE:
3503 49 : if (_is_buf_allocated(bdev_io->u.bdev.iovs)) {
3504 49 : _bdev_rw_split(bdev_io);
3505 49 : } else {
3506 0 : assert(bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
3507 0 : spdk_bdev_io_get_buf(bdev_io, bdev_rw_split_get_buf_cb,
3508 0 : bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
3509 : }
3510 49 : break;
3511 : case SPDK_BDEV_IO_TYPE_UNMAP:
3512 2 : bdev_unmap_split(bdev_io);
3513 2 : break;
3514 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3515 5 : bdev_write_zeroes_split(bdev_io);
3516 5 : break;
3517 : case SPDK_BDEV_IO_TYPE_COPY:
3518 3 : bdev_copy_split(bdev_io);
3519 3 : break;
3520 : default:
3521 0 : assert(false);
3522 : break;
3523 : }
3524 59 : }
3525 :
3526 : static void
3527 0 : bdev_rw_split_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
3528 : {
3529 0 : if (!success) {
3530 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
3531 0 : return;
3532 : }
3533 :
3534 0 : _bdev_rw_split(bdev_io);
3535 0 : }
3536 :
3537 : static inline void
3538 579 : _bdev_io_submit(struct spdk_bdev_io *bdev_io)
3539 : {
3540 579 : struct spdk_bdev *bdev = bdev_io->bdev;
3541 579 : struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
3542 :
3543 579 : if (spdk_likely(bdev_ch->flags == 0)) {
3544 554 : bdev_io_do_submit(bdev_ch, bdev_io);
3545 554 : return;
3546 : }
3547 :
3548 25 : if (bdev_ch->flags & BDEV_CH_RESET_IN_PROGRESS) {
3549 2 : _bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_ABORTED);
3550 25 : } else if (bdev_ch->flags & BDEV_CH_QOS_ENABLED) {
3551 23 : if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT) &&
3552 2 : bdev_abort_queued_io(&bdev_ch->qos_queued_io, bdev_io->u.abort.bio_to_abort)) {
3553 0 : _bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
3554 0 : } else {
3555 23 : TAILQ_INSERT_TAIL(&bdev_ch->qos_queued_io, bdev_io, internal.link);
3556 23 : bdev_qos_io_submit(bdev_ch, bdev->internal.qos);
3557 : }
3558 23 : } else {
3559 0 : SPDK_ERRLOG("unknown bdev_ch flag %x found\n", bdev_ch->flags);
3560 0 : _bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
3561 : }
3562 579 : }
3563 :
3564 : bool bdev_lba_range_overlapped(struct lba_range *range1, struct lba_range *range2);
3565 :
3566 : bool
3567 23 : bdev_lba_range_overlapped(struct lba_range *range1, struct lba_range *range2)
3568 : {
3569 23 : if (range1->length == 0 || range2->length == 0) {
3570 1 : return false;
3571 : }
3572 :
3573 22 : if (range1->offset + range1->length <= range2->offset) {
3574 1 : return false;
3575 : }
3576 :
3577 21 : if (range2->offset + range2->length <= range1->offset) {
3578 3 : return false;
3579 : }
3580 :
3581 18 : return true;
3582 23 : }
3583 :
3584 : static bool
3585 11 : bdev_io_range_is_locked(struct spdk_bdev_io *bdev_io, struct lba_range *range)
3586 : {
3587 11 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
3588 : struct lba_range r;
3589 :
3590 11 : switch (bdev_io->type) {
3591 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3592 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3593 : /* Don't try to decode the NVMe command - just assume worst-case and that
3594 : * it overlaps a locked range.
3595 : */
3596 0 : return true;
3597 : case SPDK_BDEV_IO_TYPE_READ:
3598 6 : if (!range->quiesce) {
3599 4 : return false;
3600 : }
3601 : /* fallthrough */
3602 : case SPDK_BDEV_IO_TYPE_WRITE:
3603 : case SPDK_BDEV_IO_TYPE_UNMAP:
3604 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3605 : case SPDK_BDEV_IO_TYPE_ZCOPY:
3606 : case SPDK_BDEV_IO_TYPE_COPY:
3607 7 : r.offset = bdev_io->u.bdev.offset_blocks;
3608 7 : r.length = bdev_io->u.bdev.num_blocks;
3609 7 : if (!bdev_lba_range_overlapped(range, &r)) {
3610 : /* This I/O doesn't overlap the specified LBA range. */
3611 0 : return false;
3612 7 : } else if (range->owner_ch == ch && range->locked_ctx == bdev_io->internal.caller_ctx) {
3613 : /* This I/O overlaps, but the I/O is on the same channel that locked this
3614 : * range, and the caller_ctx is the same as the locked_ctx. This means
3615 : * that this I/O is associated with the lock, and is allowed to execute.
3616 : */
3617 2 : return false;
3618 : } else {
3619 5 : return true;
3620 : }
3621 : default:
3622 0 : return false;
3623 : }
3624 11 : }
3625 :
3626 : void
3627 639 : bdev_io_submit(struct spdk_bdev_io *bdev_io)
3628 : {
3629 639 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
3630 :
3631 639 : assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
3632 :
3633 639 : if (!TAILQ_EMPTY(&ch->locked_ranges)) {
3634 : struct lba_range *range;
3635 :
3636 13 : TAILQ_FOREACH(range, &ch->locked_ranges, tailq) {
3637 8 : if (bdev_io_range_is_locked(bdev_io, range)) {
3638 3 : TAILQ_INSERT_TAIL(&ch->io_locked, bdev_io, internal.ch_link);
3639 3 : return;
3640 : }
3641 5 : }
3642 5 : }
3643 :
3644 636 : bdev_ch_add_to_io_submitted(bdev_io);
3645 :
3646 636 : bdev_io->internal.submit_tsc = spdk_get_ticks();
3647 636 : spdk_trace_record_tsc(bdev_io->internal.submit_tsc, TRACE_BDEV_IO_START,
3648 : ch->trace_id, bdev_io->u.bdev.num_blocks,
3649 : (uintptr_t)bdev_io, (uint64_t)bdev_io->type, bdev_io->internal.caller_ctx,
3650 : bdev_io->u.bdev.offset_blocks, ch->queue_depth);
3651 :
3652 636 : if (bdev_io->internal.f.split) {
3653 59 : bdev_io_split(bdev_io);
3654 59 : return;
3655 : }
3656 :
3657 577 : _bdev_io_submit(bdev_io);
3658 639 : }
3659 :
3660 : static inline void
3661 4 : _bdev_io_ext_use_bounce_buffer(struct spdk_bdev_io *bdev_io)
3662 : {
3663 : /* bdev doesn't support memory domains, thereby buffers in this IO request can't
3664 : * be accessed directly. It is needed to allocate buffers before issuing IO operation.
3665 : * For write operation we need to pull buffers from memory domain before submitting IO.
3666 : * Once read operation completes, we need to use memory_domain push functionality to
3667 : * update data in original memory domain IO buffer
3668 : * This IO request will go through a regular IO flow, so clear memory domains pointers */
3669 4 : assert(bdev_io->internal.f.has_memory_domain);
3670 4 : bdev_io->u.bdev.memory_domain = NULL;
3671 4 : bdev_io->u.bdev.memory_domain_ctx = NULL;
3672 8 : _bdev_memory_domain_io_get_buf(bdev_io, _bdev_memory_domain_get_io_cb,
3673 4 : bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
3674 4 : }
3675 :
3676 : static inline void
3677 292 : _bdev_io_submit_ext(struct spdk_bdev_desc *desc, struct spdk_bdev_io *bdev_io)
3678 : {
3679 292 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
3680 292 : bool needs_exec = bdev_io_needs_sequence_exec(desc, bdev_io);
3681 :
3682 292 : if (spdk_unlikely(ch->flags & BDEV_CH_RESET_IN_PROGRESS)) {
3683 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_ABORTED;
3684 0 : bdev_io_complete_unsubmitted(bdev_io);
3685 0 : return;
3686 : }
3687 :
3688 : /* We need to allocate bounce buffer if bdev doesn't support memory domains, or if it does
3689 : * support them, but we need to execute an accel sequence and the data buffer is from accel
3690 : * memory domain (to avoid doing a push/pull from that domain).
3691 : */
3692 292 : if (bdev_io_use_memory_domain(bdev_io)) {
3693 4 : if (!desc->memory_domains_supported ||
3694 0 : (needs_exec && bdev_io->internal.memory_domain == spdk_accel_get_memory_domain())) {
3695 4 : _bdev_io_ext_use_bounce_buffer(bdev_io);
3696 4 : return;
3697 : }
3698 0 : }
3699 :
3700 288 : if (needs_exec) {
3701 0 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
3702 0 : bdev_io_exec_sequence(bdev_io, bdev_io_submit_sequence_cb);
3703 0 : return;
3704 : }
3705 : /* For reads we'll execute the sequence after the data is read, so, for now, only
3706 : * clear out accel_sequence pointer and submit the IO */
3707 0 : assert(bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
3708 0 : bdev_io->u.bdev.accel_sequence = NULL;
3709 0 : }
3710 :
3711 288 : bdev_io_submit(bdev_io);
3712 292 : }
3713 :
3714 : static void
3715 11 : bdev_io_submit_reset(struct spdk_bdev_io *bdev_io)
3716 : {
3717 11 : struct spdk_bdev *bdev = bdev_io->bdev;
3718 11 : struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
3719 11 : struct spdk_io_channel *ch = bdev_ch->channel;
3720 :
3721 11 : assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
3722 :
3723 11 : bdev_io->internal.f.in_submit_request = true;
3724 11 : bdev_submit_request(bdev, ch, bdev_io);
3725 11 : bdev_io->internal.f.in_submit_request = false;
3726 11 : }
3727 :
3728 : void
3729 691 : bdev_io_init(struct spdk_bdev_io *bdev_io,
3730 : struct spdk_bdev *bdev, void *cb_arg,
3731 : spdk_bdev_io_completion_cb cb)
3732 : {
3733 691 : bdev_io->bdev = bdev;
3734 691 : bdev_io->internal.f.raw = 0;
3735 691 : bdev_io->internal.caller_ctx = cb_arg;
3736 691 : bdev_io->internal.cb = cb;
3737 691 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
3738 691 : bdev_io->internal.f.in_submit_request = false;
3739 691 : bdev_io->internal.error.nvme.cdw0 = 0;
3740 691 : bdev_io->num_retries = 0;
3741 691 : bdev_io->internal.get_buf_cb = NULL;
3742 691 : bdev_io->internal.get_aux_buf_cb = NULL;
3743 691 : bdev_io->internal.data_transfer_cpl = NULL;
3744 691 : bdev_io->internal.f.split = bdev_io_should_split(bdev_io);
3745 691 : }
3746 :
3747 : static bool
3748 520 : bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type)
3749 : {
3750 520 : return bdev->fn_table->io_type_supported(bdev->ctxt, io_type);
3751 : }
3752 :
3753 : bool
3754 170 : spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type)
3755 : {
3756 : bool supported;
3757 :
3758 170 : supported = bdev_io_type_supported(bdev, io_type);
3759 :
3760 170 : if (!supported) {
3761 7 : switch (io_type) {
3762 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3763 : /* The bdev layer will emulate write zeroes as long as write is supported. */
3764 0 : supported = bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE);
3765 0 : break;
3766 : default:
3767 7 : break;
3768 : }
3769 7 : }
3770 :
3771 170 : return supported;
3772 : }
3773 :
3774 : static const char *g_io_type_strings[] = {
3775 : [SPDK_BDEV_IO_TYPE_READ] = "read",
3776 : [SPDK_BDEV_IO_TYPE_WRITE] = "write",
3777 : [SPDK_BDEV_IO_TYPE_UNMAP] = "unmap",
3778 : [SPDK_BDEV_IO_TYPE_FLUSH] = "flush",
3779 : [SPDK_BDEV_IO_TYPE_RESET] = "reset",
3780 : [SPDK_BDEV_IO_TYPE_NVME_ADMIN] = "nvme_admin",
3781 : [SPDK_BDEV_IO_TYPE_NVME_IO] = "nvme_io",
3782 : [SPDK_BDEV_IO_TYPE_NVME_IO_MD] = "nvme_io_md",
3783 : [SPDK_BDEV_IO_TYPE_WRITE_ZEROES] = "write_zeroes",
3784 : [SPDK_BDEV_IO_TYPE_ZCOPY] = "zcopy",
3785 : [SPDK_BDEV_IO_TYPE_GET_ZONE_INFO] = "get_zone_info",
3786 : [SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT] = "zone_management",
3787 : [SPDK_BDEV_IO_TYPE_ZONE_APPEND] = "zone_append",
3788 : [SPDK_BDEV_IO_TYPE_COMPARE] = "compare",
3789 : [SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE] = "compare_and_write",
3790 : [SPDK_BDEV_IO_TYPE_ABORT] = "abort",
3791 : [SPDK_BDEV_IO_TYPE_SEEK_HOLE] = "seek_hole",
3792 : [SPDK_BDEV_IO_TYPE_SEEK_DATA] = "seek_data",
3793 : [SPDK_BDEV_IO_TYPE_COPY] = "copy",
3794 : [SPDK_BDEV_IO_TYPE_NVME_IOV_MD] = "nvme_iov_md",
3795 : };
3796 :
3797 : const char *
3798 0 : spdk_bdev_get_io_type_name(enum spdk_bdev_io_type io_type)
3799 : {
3800 0 : if (io_type <= SPDK_BDEV_IO_TYPE_INVALID || io_type >= SPDK_BDEV_NUM_IO_TYPES) {
3801 0 : return NULL;
3802 : }
3803 :
3804 0 : return g_io_type_strings[io_type];
3805 0 : }
3806 :
3807 : int
3808 0 : spdk_bdev_get_io_type(const char *io_type_string)
3809 : {
3810 : int i;
3811 :
3812 0 : for (i = SPDK_BDEV_IO_TYPE_READ; i < SPDK_BDEV_NUM_IO_TYPES; ++i) {
3813 0 : if (!strcmp(io_type_string, g_io_type_strings[i])) {
3814 0 : return i;
3815 : }
3816 0 : }
3817 :
3818 0 : return -1;
3819 0 : }
3820 :
3821 : uint64_t
3822 0 : spdk_bdev_io_get_submit_tsc(struct spdk_bdev_io *bdev_io)
3823 : {
3824 0 : return bdev_io->internal.submit_tsc;
3825 : }
3826 :
3827 : int
3828 0 : spdk_bdev_dump_info_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
3829 : {
3830 0 : if (bdev->fn_table->dump_info_json) {
3831 0 : return bdev->fn_table->dump_info_json(bdev->ctxt, w);
3832 : }
3833 :
3834 0 : return 0;
3835 0 : }
3836 :
3837 : static void
3838 10 : bdev_qos_update_max_quota_per_timeslice(struct spdk_bdev_qos *qos)
3839 : {
3840 10 : uint32_t max_per_timeslice = 0;
3841 : int i;
3842 :
3843 50 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
3844 40 : if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
3845 15 : qos->rate_limits[i].max_per_timeslice = 0;
3846 15 : continue;
3847 : }
3848 :
3849 50 : max_per_timeslice = qos->rate_limits[i].limit *
3850 25 : SPDK_BDEV_QOS_TIMESLICE_IN_USEC / SPDK_SEC_TO_USEC;
3851 :
3852 25 : qos->rate_limits[i].max_per_timeslice = spdk_max(max_per_timeslice,
3853 : qos->rate_limits[i].min_per_timeslice);
3854 :
3855 50 : __atomic_store_n(&qos->rate_limits[i].remaining_this_timeslice,
3856 25 : qos->rate_limits[i].max_per_timeslice, __ATOMIC_RELEASE);
3857 25 : }
3858 :
3859 10 : bdev_qos_set_ops(qos);
3860 10 : }
3861 :
3862 : static void
3863 4 : bdev_channel_submit_qos_io(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
3864 : struct spdk_io_channel *io_ch, void *ctx)
3865 : {
3866 4 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(io_ch);
3867 : int status;
3868 :
3869 4 : bdev_qos_io_submit(bdev_ch, bdev->internal.qos);
3870 :
3871 : /* if all IOs were sent then continue the iteration, otherwise - stop it */
3872 : /* TODO: channels round robing */
3873 4 : status = TAILQ_EMPTY(&bdev_ch->qos_queued_io) ? 0 : 1;
3874 :
3875 4 : spdk_bdev_for_each_channel_continue(i, status);
3876 4 : }
3877 :
3878 :
3879 : static void
3880 2 : bdev_channel_submit_qos_io_done(struct spdk_bdev *bdev, void *ctx, int status)
3881 : {
3882 :
3883 2 : }
3884 :
3885 : static int
3886 3 : bdev_channel_poll_qos(void *arg)
3887 : {
3888 3 : struct spdk_bdev *bdev = arg;
3889 3 : struct spdk_bdev_qos *qos = bdev->internal.qos;
3890 3 : uint64_t now = spdk_get_ticks();
3891 : int i;
3892 : int64_t remaining_last_timeslice;
3893 :
3894 3 : if (spdk_unlikely(qos->thread == NULL)) {
3895 : /* Old QoS was unbound to remove and new QoS is not enabled yet. */
3896 1 : return SPDK_POLLER_IDLE;
3897 : }
3898 :
3899 2 : if (now < (qos->last_timeslice + qos->timeslice_size)) {
3900 : /* We received our callback earlier than expected - return
3901 : * immediately and wait to do accounting until at least one
3902 : * timeslice has actually expired. This should never happen
3903 : * with a well-behaved timer implementation.
3904 : */
3905 0 : return SPDK_POLLER_IDLE;
3906 : }
3907 :
3908 : /* Reset for next round of rate limiting */
3909 10 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
3910 : /* We may have allowed the IOs or bytes to slightly overrun in the last
3911 : * timeslice. remaining_this_timeslice is signed, so if it's negative
3912 : * here, we'll account for the overrun so that the next timeslice will
3913 : * be appropriately reduced.
3914 : */
3915 8 : remaining_last_timeslice = __atomic_exchange_n(&qos->rate_limits[i].remaining_this_timeslice,
3916 : 0, __ATOMIC_RELAXED);
3917 8 : if (remaining_last_timeslice < 0) {
3918 : /* There could be a race condition here as both bdev_qos_rw_queue_io() and bdev_channel_poll_qos()
3919 : * potentially use 2 atomic ops each, so they can intertwine.
3920 : * This race can potentially cause the limits to be a little fuzzy but won't cause any real damage.
3921 : */
3922 0 : __atomic_store_n(&qos->rate_limits[i].remaining_this_timeslice,
3923 0 : remaining_last_timeslice, __ATOMIC_RELAXED);
3924 0 : }
3925 8 : }
3926 :
3927 4 : while (now >= (qos->last_timeslice + qos->timeslice_size)) {
3928 2 : qos->last_timeslice += qos->timeslice_size;
3929 10 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
3930 16 : __atomic_add_fetch(&qos->rate_limits[i].remaining_this_timeslice,
3931 8 : qos->rate_limits[i].max_per_timeslice, __ATOMIC_RELAXED);
3932 8 : }
3933 : }
3934 :
3935 2 : spdk_bdev_for_each_channel(bdev, bdev_channel_submit_qos_io, qos,
3936 : bdev_channel_submit_qos_io_done);
3937 :
3938 2 : return SPDK_POLLER_BUSY;
3939 3 : }
3940 :
3941 : static void
3942 74 : bdev_channel_destroy_resource(struct spdk_bdev_channel *ch)
3943 : {
3944 : struct spdk_bdev_shared_resource *shared_resource;
3945 : struct lba_range *range;
3946 :
3947 74 : bdev_free_io_stat(ch->stat);
3948 : #ifdef SPDK_CONFIG_VTUNE
3949 : bdev_free_io_stat(ch->prev_stat);
3950 : #endif
3951 :
3952 74 : while (!TAILQ_EMPTY(&ch->locked_ranges)) {
3953 0 : range = TAILQ_FIRST(&ch->locked_ranges);
3954 0 : TAILQ_REMOVE(&ch->locked_ranges, range, tailq);
3955 0 : free(range);
3956 : }
3957 :
3958 74 : spdk_put_io_channel(ch->channel);
3959 74 : spdk_put_io_channel(ch->accel_channel);
3960 :
3961 74 : shared_resource = ch->shared_resource;
3962 :
3963 74 : assert(TAILQ_EMPTY(&ch->io_locked));
3964 74 : assert(TAILQ_EMPTY(&ch->io_submitted));
3965 74 : assert(TAILQ_EMPTY(&ch->io_accel_exec));
3966 74 : assert(TAILQ_EMPTY(&ch->io_memory_domain));
3967 74 : assert(ch->io_outstanding == 0);
3968 74 : assert(shared_resource->ref > 0);
3969 74 : shared_resource->ref--;
3970 74 : if (shared_resource->ref == 0) {
3971 73 : assert(shared_resource->io_outstanding == 0);
3972 73 : TAILQ_REMOVE(&shared_resource->mgmt_ch->shared_resources, shared_resource, link);
3973 73 : spdk_put_io_channel(spdk_io_channel_from_ctx(shared_resource->mgmt_ch));
3974 73 : spdk_poller_unregister(&shared_resource->nomem_poller);
3975 73 : free(shared_resource);
3976 73 : }
3977 74 : }
3978 :
3979 : static void
3980 83 : bdev_enable_qos(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch)
3981 : {
3982 83 : struct spdk_bdev_qos *qos = bdev->internal.qos;
3983 : int i;
3984 :
3985 83 : assert(spdk_spin_held(&bdev->internal.spinlock));
3986 :
3987 : /* Rate limiting on this bdev enabled */
3988 83 : if (qos) {
3989 17 : if (qos->ch == NULL) {
3990 : struct spdk_io_channel *io_ch;
3991 :
3992 9 : SPDK_DEBUGLOG(bdev, "Selecting channel %p as QoS channel for bdev %s on thread %p\n", ch,
3993 : bdev->name, spdk_get_thread());
3994 :
3995 : /* No qos channel has been selected, so set one up */
3996 :
3997 : /* Take another reference to ch */
3998 9 : io_ch = spdk_get_io_channel(__bdev_to_io_dev(bdev));
3999 9 : assert(io_ch != NULL);
4000 9 : qos->ch = ch;
4001 :
4002 9 : qos->thread = spdk_io_channel_get_thread(io_ch);
4003 :
4004 45 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
4005 36 : if (bdev_qos_is_iops_rate_limit(i) == true) {
4006 9 : qos->rate_limits[i].min_per_timeslice =
4007 : SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE;
4008 9 : } else {
4009 27 : qos->rate_limits[i].min_per_timeslice =
4010 : SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE;
4011 : }
4012 :
4013 36 : if (qos->rate_limits[i].limit == 0) {
4014 2 : qos->rate_limits[i].limit = SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
4015 2 : }
4016 36 : }
4017 9 : bdev_qos_update_max_quota_per_timeslice(qos);
4018 9 : qos->timeslice_size =
4019 9 : SPDK_BDEV_QOS_TIMESLICE_IN_USEC * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
4020 9 : qos->last_timeslice = spdk_get_ticks();
4021 9 : qos->poller = SPDK_POLLER_REGISTER(bdev_channel_poll_qos,
4022 : bdev,
4023 : SPDK_BDEV_QOS_TIMESLICE_IN_USEC);
4024 9 : }
4025 :
4026 17 : ch->flags |= BDEV_CH_QOS_ENABLED;
4027 17 : }
4028 83 : }
4029 :
4030 : struct poll_timeout_ctx {
4031 : struct spdk_bdev_desc *desc;
4032 : uint64_t timeout_in_sec;
4033 : spdk_bdev_io_timeout_cb cb_fn;
4034 : void *cb_arg;
4035 : };
4036 :
4037 : static void
4038 267 : bdev_desc_free(struct spdk_bdev_desc *desc)
4039 : {
4040 267 : spdk_spin_destroy(&desc->spinlock);
4041 267 : free(desc->media_events_buffer);
4042 267 : free(desc);
4043 267 : }
4044 :
4045 : static void
4046 8 : bdev_channel_poll_timeout_io_done(struct spdk_bdev *bdev, void *_ctx, int status)
4047 : {
4048 8 : struct poll_timeout_ctx *ctx = _ctx;
4049 8 : struct spdk_bdev_desc *desc = ctx->desc;
4050 :
4051 8 : free(ctx);
4052 :
4053 8 : spdk_spin_lock(&desc->spinlock);
4054 8 : desc->refs--;
4055 8 : if (desc->closed == true && desc->refs == 0) {
4056 1 : spdk_spin_unlock(&desc->spinlock);
4057 1 : bdev_desc_free(desc);
4058 1 : return;
4059 : }
4060 7 : spdk_spin_unlock(&desc->spinlock);
4061 8 : }
4062 :
4063 : static void
4064 13 : bdev_channel_poll_timeout_io(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
4065 : struct spdk_io_channel *io_ch, void *_ctx)
4066 : {
4067 13 : struct poll_timeout_ctx *ctx = _ctx;
4068 13 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(io_ch);
4069 13 : struct spdk_bdev_desc *desc = ctx->desc;
4070 : struct spdk_bdev_io *bdev_io;
4071 : uint64_t now;
4072 :
4073 13 : spdk_spin_lock(&desc->spinlock);
4074 13 : if (desc->closed == true) {
4075 1 : spdk_spin_unlock(&desc->spinlock);
4076 1 : spdk_bdev_for_each_channel_continue(i, -1);
4077 1 : return;
4078 : }
4079 12 : spdk_spin_unlock(&desc->spinlock);
4080 :
4081 12 : now = spdk_get_ticks();
4082 22 : TAILQ_FOREACH(bdev_io, &bdev_ch->io_submitted, internal.ch_link) {
4083 : /* Exclude any I/O that are generated via splitting. */
4084 15 : if (bdev_io->internal.cb == bdev_io_split_done) {
4085 3 : continue;
4086 : }
4087 :
4088 : /* Once we find an I/O that has not timed out, we can immediately
4089 : * exit the loop.
4090 : */
4091 24 : if (now < (bdev_io->internal.submit_tsc +
4092 12 : ctx->timeout_in_sec * spdk_get_ticks_hz())) {
4093 5 : goto end;
4094 : }
4095 :
4096 7 : if (bdev_io->internal.desc == desc) {
4097 7 : ctx->cb_fn(ctx->cb_arg, bdev_io);
4098 7 : }
4099 14 : }
4100 :
4101 : end:
4102 12 : spdk_bdev_for_each_channel_continue(i, 0);
4103 13 : }
4104 :
4105 : static int
4106 8 : bdev_poll_timeout_io(void *arg)
4107 : {
4108 8 : struct spdk_bdev_desc *desc = arg;
4109 8 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
4110 : struct poll_timeout_ctx *ctx;
4111 :
4112 8 : ctx = calloc(1, sizeof(struct poll_timeout_ctx));
4113 8 : if (!ctx) {
4114 0 : SPDK_ERRLOG("failed to allocate memory\n");
4115 0 : return SPDK_POLLER_BUSY;
4116 : }
4117 8 : ctx->desc = desc;
4118 8 : ctx->cb_arg = desc->cb_arg;
4119 8 : ctx->cb_fn = desc->cb_fn;
4120 8 : ctx->timeout_in_sec = desc->timeout_in_sec;
4121 :
4122 : /* Take a ref on the descriptor in case it gets closed while we are checking
4123 : * all of the channels.
4124 : */
4125 8 : spdk_spin_lock(&desc->spinlock);
4126 8 : desc->refs++;
4127 8 : spdk_spin_unlock(&desc->spinlock);
4128 :
4129 8 : spdk_bdev_for_each_channel(bdev, bdev_channel_poll_timeout_io, ctx,
4130 : bdev_channel_poll_timeout_io_done);
4131 :
4132 8 : return SPDK_POLLER_BUSY;
4133 8 : }
4134 :
4135 : int
4136 5 : spdk_bdev_set_timeout(struct spdk_bdev_desc *desc, uint64_t timeout_in_sec,
4137 : spdk_bdev_io_timeout_cb cb_fn, void *cb_arg)
4138 : {
4139 5 : assert(desc->thread == spdk_get_thread());
4140 :
4141 5 : spdk_poller_unregister(&desc->io_timeout_poller);
4142 :
4143 5 : if (timeout_in_sec) {
4144 4 : assert(cb_fn != NULL);
4145 4 : desc->io_timeout_poller = SPDK_POLLER_REGISTER(bdev_poll_timeout_io,
4146 : desc,
4147 : SPDK_BDEV_IO_POLL_INTERVAL_IN_MSEC * SPDK_SEC_TO_USEC /
4148 : 1000);
4149 4 : if (desc->io_timeout_poller == NULL) {
4150 0 : SPDK_ERRLOG("can not register the desc timeout IO poller\n");
4151 0 : return -1;
4152 : }
4153 4 : }
4154 :
4155 5 : desc->cb_fn = cb_fn;
4156 5 : desc->cb_arg = cb_arg;
4157 5 : desc->timeout_in_sec = timeout_in_sec;
4158 :
4159 5 : return 0;
4160 5 : }
4161 :
4162 : static int
4163 76 : bdev_channel_create(void *io_device, void *ctx_buf)
4164 : {
4165 76 : struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
4166 76 : struct spdk_bdev_channel *ch = ctx_buf;
4167 : struct spdk_io_channel *mgmt_io_ch;
4168 : struct spdk_bdev_mgmt_channel *mgmt_ch;
4169 : struct spdk_bdev_shared_resource *shared_resource;
4170 : struct lba_range *range;
4171 :
4172 76 : ch->bdev = bdev;
4173 76 : ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt);
4174 76 : if (!ch->channel) {
4175 2 : return -1;
4176 : }
4177 :
4178 74 : ch->accel_channel = spdk_accel_get_io_channel();
4179 74 : if (!ch->accel_channel) {
4180 0 : spdk_put_io_channel(ch->channel);
4181 0 : return -1;
4182 : }
4183 :
4184 74 : spdk_trace_record(TRACE_BDEV_IOCH_CREATE, bdev->internal.trace_id, 0, 0,
4185 : spdk_thread_get_id(spdk_io_channel_get_thread(ch->channel)));
4186 :
4187 74 : assert(ch->histogram == NULL);
4188 74 : if (bdev->internal.histogram_enabled) {
4189 0 : ch->histogram = spdk_histogram_data_alloc();
4190 0 : if (ch->histogram == NULL) {
4191 0 : SPDK_ERRLOG("Could not allocate histogram\n");
4192 0 : }
4193 0 : }
4194 :
4195 74 : mgmt_io_ch = spdk_get_io_channel(&g_bdev_mgr);
4196 74 : if (!mgmt_io_ch) {
4197 0 : spdk_put_io_channel(ch->channel);
4198 0 : spdk_put_io_channel(ch->accel_channel);
4199 0 : return -1;
4200 : }
4201 :
4202 74 : mgmt_ch = __io_ch_to_bdev_mgmt_ch(mgmt_io_ch);
4203 76 : TAILQ_FOREACH(shared_resource, &mgmt_ch->shared_resources, link) {
4204 3 : if (shared_resource->shared_ch == ch->channel) {
4205 1 : spdk_put_io_channel(mgmt_io_ch);
4206 1 : shared_resource->ref++;
4207 1 : break;
4208 : }
4209 2 : }
4210 :
4211 74 : if (shared_resource == NULL) {
4212 73 : shared_resource = calloc(1, sizeof(*shared_resource));
4213 73 : if (shared_resource == NULL) {
4214 0 : spdk_put_io_channel(ch->channel);
4215 0 : spdk_put_io_channel(ch->accel_channel);
4216 0 : spdk_put_io_channel(mgmt_io_ch);
4217 0 : return -1;
4218 : }
4219 :
4220 73 : shared_resource->mgmt_ch = mgmt_ch;
4221 73 : shared_resource->io_outstanding = 0;
4222 73 : TAILQ_INIT(&shared_resource->nomem_io);
4223 73 : shared_resource->nomem_threshold = 0;
4224 73 : shared_resource->shared_ch = ch->channel;
4225 73 : shared_resource->ref = 1;
4226 73 : TAILQ_INSERT_TAIL(&mgmt_ch->shared_resources, shared_resource, link);
4227 73 : }
4228 :
4229 74 : ch->io_outstanding = 0;
4230 74 : TAILQ_INIT(&ch->queued_resets);
4231 74 : TAILQ_INIT(&ch->locked_ranges);
4232 74 : TAILQ_INIT(&ch->qos_queued_io);
4233 74 : ch->flags = 0;
4234 74 : ch->trace_id = bdev->internal.trace_id;
4235 74 : ch->shared_resource = shared_resource;
4236 :
4237 74 : TAILQ_INIT(&ch->io_submitted);
4238 74 : TAILQ_INIT(&ch->io_locked);
4239 74 : TAILQ_INIT(&ch->io_accel_exec);
4240 74 : TAILQ_INIT(&ch->io_memory_domain);
4241 :
4242 74 : ch->stat = bdev_alloc_io_stat(false);
4243 74 : if (ch->stat == NULL) {
4244 0 : bdev_channel_destroy_resource(ch);
4245 0 : return -1;
4246 : }
4247 :
4248 74 : ch->stat->ticks_rate = spdk_get_ticks_hz();
4249 :
4250 : #ifdef SPDK_CONFIG_VTUNE
4251 : {
4252 : char *name;
4253 : __itt_init_ittlib(NULL, 0);
4254 : name = spdk_sprintf_alloc("spdk_bdev_%s_%p", ch->bdev->name, ch);
4255 : if (!name) {
4256 : bdev_channel_destroy_resource(ch);
4257 : return -1;
4258 : }
4259 : ch->handle = __itt_string_handle_create(name);
4260 : free(name);
4261 : ch->start_tsc = spdk_get_ticks();
4262 : ch->interval_tsc = spdk_get_ticks_hz() / 100;
4263 : ch->prev_stat = bdev_alloc_io_stat(false);
4264 : if (ch->prev_stat == NULL) {
4265 : bdev_channel_destroy_resource(ch);
4266 : return -1;
4267 : }
4268 : }
4269 : #endif
4270 :
4271 74 : spdk_spin_lock(&bdev->internal.spinlock);
4272 74 : bdev_enable_qos(bdev, ch);
4273 :
4274 75 : TAILQ_FOREACH(range, &bdev->internal.locked_ranges, tailq) {
4275 : struct lba_range *new_range;
4276 :
4277 1 : new_range = calloc(1, sizeof(*new_range));
4278 1 : if (new_range == NULL) {
4279 0 : spdk_spin_unlock(&bdev->internal.spinlock);
4280 0 : bdev_channel_destroy_resource(ch);
4281 0 : return -1;
4282 : }
4283 1 : new_range->length = range->length;
4284 1 : new_range->offset = range->offset;
4285 1 : new_range->locked_ctx = range->locked_ctx;
4286 1 : TAILQ_INSERT_TAIL(&ch->locked_ranges, new_range, tailq);
4287 1 : }
4288 :
4289 74 : spdk_spin_unlock(&bdev->internal.spinlock);
4290 :
4291 74 : return 0;
4292 76 : }
4293 :
4294 : static int
4295 0 : bdev_abort_all_buf_io_cb(struct spdk_iobuf_channel *ch, struct spdk_iobuf_entry *entry,
4296 : void *cb_ctx)
4297 : {
4298 0 : struct spdk_bdev_channel *bdev_ch = cb_ctx;
4299 : struct spdk_bdev_io *bdev_io;
4300 : uint64_t buf_len;
4301 :
4302 0 : bdev_io = SPDK_CONTAINEROF(entry, struct spdk_bdev_io, internal.iobuf);
4303 0 : if (bdev_io->internal.ch == bdev_ch) {
4304 0 : buf_len = bdev_io_get_max_buf_len(bdev_io, bdev_io->internal.buf.len);
4305 0 : spdk_iobuf_entry_abort(ch, entry, buf_len);
4306 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_ABORTED);
4307 0 : }
4308 :
4309 0 : return 0;
4310 : }
4311 :
4312 : /*
4313 : * Abort I/O that are waiting on a data buffer.
4314 : */
4315 : static void
4316 96 : bdev_abort_all_buf_io(struct spdk_bdev_mgmt_channel *mgmt_ch, struct spdk_bdev_channel *ch)
4317 : {
4318 96 : spdk_iobuf_for_each_entry(&mgmt_ch->iobuf, bdev_abort_all_buf_io_cb, ch);
4319 96 : }
4320 :
4321 : /*
4322 : * Abort I/O that are queued waiting for submission. These types of I/O are
4323 : * linked using the spdk_bdev_io link TAILQ_ENTRY.
4324 : */
4325 : static void
4326 188 : bdev_abort_all_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
4327 : {
4328 : struct spdk_bdev_io *bdev_io, *tmp;
4329 :
4330 228 : TAILQ_FOREACH_SAFE(bdev_io, queue, internal.link, tmp) {
4331 40 : if (bdev_io->internal.ch == ch) {
4332 40 : TAILQ_REMOVE(queue, bdev_io, internal.link);
4333 : /*
4334 : * spdk_bdev_io_complete() assumes that the completed I/O had
4335 : * been submitted to the bdev module. Since in this case it
4336 : * hadn't, bump io_outstanding to account for the decrement
4337 : * that spdk_bdev_io_complete() will do.
4338 : */
4339 40 : if (bdev_io->type != SPDK_BDEV_IO_TYPE_RESET) {
4340 39 : bdev_io_increment_outstanding(ch, ch->shared_resource);
4341 39 : }
4342 40 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_ABORTED);
4343 40 : }
4344 40 : }
4345 188 : }
4346 :
4347 : static bool
4348 18 : bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_io *bio_to_abort)
4349 : {
4350 : struct spdk_bdev_io *bdev_io;
4351 :
4352 18 : TAILQ_FOREACH(bdev_io, queue, internal.link) {
4353 0 : if (bdev_io == bio_to_abort) {
4354 0 : TAILQ_REMOVE(queue, bio_to_abort, internal.link);
4355 0 : spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED);
4356 0 : return true;
4357 : }
4358 0 : }
4359 :
4360 18 : return false;
4361 18 : }
4362 :
4363 : static int
4364 0 : bdev_abort_buf_io_cb(struct spdk_iobuf_channel *ch, struct spdk_iobuf_entry *entry, void *cb_ctx)
4365 : {
4366 0 : struct spdk_bdev_io *bdev_io, *bio_to_abort = cb_ctx;
4367 : uint64_t buf_len;
4368 :
4369 0 : bdev_io = SPDK_CONTAINEROF(entry, struct spdk_bdev_io, internal.iobuf);
4370 0 : if (bdev_io == bio_to_abort) {
4371 0 : buf_len = bdev_io_get_max_buf_len(bdev_io, bdev_io->internal.buf.len);
4372 0 : spdk_iobuf_entry_abort(ch, entry, buf_len);
4373 0 : spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED);
4374 0 : return 1;
4375 : }
4376 :
4377 0 : return 0;
4378 0 : }
4379 :
4380 : static bool
4381 16 : bdev_abort_buf_io(struct spdk_bdev_mgmt_channel *mgmt_ch, struct spdk_bdev_io *bio_to_abort)
4382 : {
4383 : int rc;
4384 :
4385 16 : rc = spdk_iobuf_for_each_entry(&mgmt_ch->iobuf, bdev_abort_buf_io_cb, bio_to_abort);
4386 16 : return rc == 1;
4387 : }
4388 :
4389 : static void
4390 7 : bdev_qos_channel_destroy(void *cb_arg)
4391 : {
4392 7 : struct spdk_bdev_qos *qos = cb_arg;
4393 :
4394 7 : spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch));
4395 7 : spdk_poller_unregister(&qos->poller);
4396 :
4397 7 : SPDK_DEBUGLOG(bdev, "Free QoS %p.\n", qos);
4398 :
4399 7 : free(qos);
4400 7 : }
4401 :
4402 : static int
4403 7 : bdev_qos_destroy(struct spdk_bdev *bdev)
4404 : {
4405 : int i;
4406 :
4407 : /*
4408 : * Cleanly shutting down the QoS poller is tricky, because
4409 : * during the asynchronous operation the user could open
4410 : * a new descriptor and create a new channel, spawning
4411 : * a new QoS poller.
4412 : *
4413 : * The strategy is to create a new QoS structure here and swap it
4414 : * in. The shutdown path then continues to refer to the old one
4415 : * until it completes and then releases it.
4416 : */
4417 : struct spdk_bdev_qos *new_qos, *old_qos;
4418 :
4419 7 : old_qos = bdev->internal.qos;
4420 :
4421 7 : new_qos = calloc(1, sizeof(*new_qos));
4422 7 : if (!new_qos) {
4423 0 : SPDK_ERRLOG("Unable to allocate memory to shut down QoS.\n");
4424 0 : return -ENOMEM;
4425 : }
4426 :
4427 : /* Copy the old QoS data into the newly allocated structure */
4428 7 : memcpy(new_qos, old_qos, sizeof(*new_qos));
4429 :
4430 : /* Zero out the key parts of the QoS structure */
4431 7 : new_qos->ch = NULL;
4432 7 : new_qos->thread = NULL;
4433 7 : new_qos->poller = NULL;
4434 : /*
4435 : * The limit member of spdk_bdev_qos_limit structure is not zeroed.
4436 : * It will be used later for the new QoS structure.
4437 : */
4438 35 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
4439 28 : new_qos->rate_limits[i].remaining_this_timeslice = 0;
4440 28 : new_qos->rate_limits[i].min_per_timeslice = 0;
4441 28 : new_qos->rate_limits[i].max_per_timeslice = 0;
4442 28 : }
4443 :
4444 7 : bdev->internal.qos = new_qos;
4445 :
4446 7 : if (old_qos->thread == NULL) {
4447 0 : free(old_qos);
4448 0 : } else {
4449 7 : spdk_thread_send_msg(old_qos->thread, bdev_qos_channel_destroy, old_qos);
4450 : }
4451 :
4452 : /* It is safe to continue with destroying the bdev even though the QoS channel hasn't
4453 : * been destroyed yet. The destruction path will end up waiting for the final
4454 : * channel to be put before it releases resources. */
4455 :
4456 7 : return 0;
4457 7 : }
4458 :
4459 : void
4460 78 : spdk_bdev_add_io_stat(struct spdk_bdev_io_stat *total, struct spdk_bdev_io_stat *add)
4461 : {
4462 78 : total->bytes_read += add->bytes_read;
4463 78 : total->num_read_ops += add->num_read_ops;
4464 78 : total->bytes_written += add->bytes_written;
4465 78 : total->num_write_ops += add->num_write_ops;
4466 78 : total->bytes_unmapped += add->bytes_unmapped;
4467 78 : total->num_unmap_ops += add->num_unmap_ops;
4468 78 : total->bytes_copied += add->bytes_copied;
4469 78 : total->num_copy_ops += add->num_copy_ops;
4470 78 : total->read_latency_ticks += add->read_latency_ticks;
4471 78 : total->write_latency_ticks += add->write_latency_ticks;
4472 78 : total->unmap_latency_ticks += add->unmap_latency_ticks;
4473 78 : total->copy_latency_ticks += add->copy_latency_ticks;
4474 78 : if (total->max_read_latency_ticks < add->max_read_latency_ticks) {
4475 7 : total->max_read_latency_ticks = add->max_read_latency_ticks;
4476 7 : }
4477 78 : if (total->min_read_latency_ticks > add->min_read_latency_ticks) {
4478 39 : total->min_read_latency_ticks = add->min_read_latency_ticks;
4479 39 : }
4480 78 : if (total->max_write_latency_ticks < add->max_write_latency_ticks) {
4481 4 : total->max_write_latency_ticks = add->max_write_latency_ticks;
4482 4 : }
4483 78 : if (total->min_write_latency_ticks > add->min_write_latency_ticks) {
4484 24 : total->min_write_latency_ticks = add->min_write_latency_ticks;
4485 24 : }
4486 78 : if (total->max_unmap_latency_ticks < add->max_unmap_latency_ticks) {
4487 0 : total->max_unmap_latency_ticks = add->max_unmap_latency_ticks;
4488 0 : }
4489 78 : if (total->min_unmap_latency_ticks > add->min_unmap_latency_ticks) {
4490 3 : total->min_unmap_latency_ticks = add->min_unmap_latency_ticks;
4491 3 : }
4492 78 : if (total->max_copy_latency_ticks < add->max_copy_latency_ticks) {
4493 0 : total->max_copy_latency_ticks = add->max_copy_latency_ticks;
4494 0 : }
4495 78 : if (total->min_copy_latency_ticks > add->min_copy_latency_ticks) {
4496 4 : total->min_copy_latency_ticks = add->min_copy_latency_ticks;
4497 4 : }
4498 78 : }
4499 :
4500 : static void
4501 5 : bdev_get_io_stat(struct spdk_bdev_io_stat *to_stat, struct spdk_bdev_io_stat *from_stat)
4502 : {
4503 5 : memcpy(to_stat, from_stat, offsetof(struct spdk_bdev_io_stat, io_error));
4504 :
4505 5 : if (to_stat->io_error != NULL && from_stat->io_error != NULL) {
4506 0 : memcpy(to_stat->io_error, from_stat->io_error,
4507 : sizeof(struct spdk_bdev_io_error_stat));
4508 0 : }
4509 5 : }
4510 :
4511 : void
4512 210 : spdk_bdev_reset_io_stat(struct spdk_bdev_io_stat *stat, enum spdk_bdev_reset_stat_mode mode)
4513 : {
4514 210 : if (mode == SPDK_BDEV_RESET_STAT_NONE) {
4515 5 : return;
4516 : }
4517 :
4518 205 : stat->max_read_latency_ticks = 0;
4519 205 : stat->min_read_latency_ticks = UINT64_MAX;
4520 205 : stat->max_write_latency_ticks = 0;
4521 205 : stat->min_write_latency_ticks = UINT64_MAX;
4522 205 : stat->max_unmap_latency_ticks = 0;
4523 205 : stat->min_unmap_latency_ticks = UINT64_MAX;
4524 205 : stat->max_copy_latency_ticks = 0;
4525 205 : stat->min_copy_latency_ticks = UINT64_MAX;
4526 :
4527 205 : if (mode != SPDK_BDEV_RESET_STAT_ALL) {
4528 2 : return;
4529 : }
4530 :
4531 203 : stat->bytes_read = 0;
4532 203 : stat->num_read_ops = 0;
4533 203 : stat->bytes_written = 0;
4534 203 : stat->num_write_ops = 0;
4535 203 : stat->bytes_unmapped = 0;
4536 203 : stat->num_unmap_ops = 0;
4537 203 : stat->bytes_copied = 0;
4538 203 : stat->num_copy_ops = 0;
4539 203 : stat->read_latency_ticks = 0;
4540 203 : stat->write_latency_ticks = 0;
4541 203 : stat->unmap_latency_ticks = 0;
4542 203 : stat->copy_latency_ticks = 0;
4543 :
4544 203 : if (stat->io_error != NULL) {
4545 128 : memset(stat->io_error, 0, sizeof(struct spdk_bdev_io_error_stat));
4546 128 : }
4547 210 : }
4548 :
4549 : struct spdk_bdev_io_stat *
4550 201 : bdev_alloc_io_stat(bool io_error_stat)
4551 : {
4552 : struct spdk_bdev_io_stat *stat;
4553 :
4554 201 : stat = malloc(sizeof(struct spdk_bdev_io_stat));
4555 201 : if (stat == NULL) {
4556 0 : return NULL;
4557 : }
4558 :
4559 201 : if (io_error_stat) {
4560 127 : stat->io_error = malloc(sizeof(struct spdk_bdev_io_error_stat));
4561 127 : if (stat->io_error == NULL) {
4562 0 : free(stat);
4563 0 : return NULL;
4564 : }
4565 127 : } else {
4566 74 : stat->io_error = NULL;
4567 : }
4568 :
4569 201 : spdk_bdev_reset_io_stat(stat, SPDK_BDEV_RESET_STAT_ALL);
4570 :
4571 201 : return stat;
4572 201 : }
4573 :
4574 : void
4575 201 : bdev_free_io_stat(struct spdk_bdev_io_stat *stat)
4576 : {
4577 201 : if (stat != NULL) {
4578 201 : free(stat->io_error);
4579 201 : free(stat);
4580 201 : }
4581 201 : }
4582 :
4583 : void
4584 0 : spdk_bdev_dump_io_stat_json(struct spdk_bdev_io_stat *stat, struct spdk_json_write_ctx *w)
4585 : {
4586 : int i;
4587 :
4588 0 : spdk_json_write_named_uint64(w, "bytes_read", stat->bytes_read);
4589 0 : spdk_json_write_named_uint64(w, "num_read_ops", stat->num_read_ops);
4590 0 : spdk_json_write_named_uint64(w, "bytes_written", stat->bytes_written);
4591 0 : spdk_json_write_named_uint64(w, "num_write_ops", stat->num_write_ops);
4592 0 : spdk_json_write_named_uint64(w, "bytes_unmapped", stat->bytes_unmapped);
4593 0 : spdk_json_write_named_uint64(w, "num_unmap_ops", stat->num_unmap_ops);
4594 0 : spdk_json_write_named_uint64(w, "bytes_copied", stat->bytes_copied);
4595 0 : spdk_json_write_named_uint64(w, "num_copy_ops", stat->num_copy_ops);
4596 0 : spdk_json_write_named_uint64(w, "read_latency_ticks", stat->read_latency_ticks);
4597 0 : spdk_json_write_named_uint64(w, "max_read_latency_ticks", stat->max_read_latency_ticks);
4598 0 : spdk_json_write_named_uint64(w, "min_read_latency_ticks",
4599 0 : stat->min_read_latency_ticks != UINT64_MAX ?
4600 0 : stat->min_read_latency_ticks : 0);
4601 0 : spdk_json_write_named_uint64(w, "write_latency_ticks", stat->write_latency_ticks);
4602 0 : spdk_json_write_named_uint64(w, "max_write_latency_ticks", stat->max_write_latency_ticks);
4603 0 : spdk_json_write_named_uint64(w, "min_write_latency_ticks",
4604 0 : stat->min_write_latency_ticks != UINT64_MAX ?
4605 0 : stat->min_write_latency_ticks : 0);
4606 0 : spdk_json_write_named_uint64(w, "unmap_latency_ticks", stat->unmap_latency_ticks);
4607 0 : spdk_json_write_named_uint64(w, "max_unmap_latency_ticks", stat->max_unmap_latency_ticks);
4608 0 : spdk_json_write_named_uint64(w, "min_unmap_latency_ticks",
4609 0 : stat->min_unmap_latency_ticks != UINT64_MAX ?
4610 0 : stat->min_unmap_latency_ticks : 0);
4611 0 : spdk_json_write_named_uint64(w, "copy_latency_ticks", stat->copy_latency_ticks);
4612 0 : spdk_json_write_named_uint64(w, "max_copy_latency_ticks", stat->max_copy_latency_ticks);
4613 0 : spdk_json_write_named_uint64(w, "min_copy_latency_ticks",
4614 0 : stat->min_copy_latency_ticks != UINT64_MAX ?
4615 0 : stat->min_copy_latency_ticks : 0);
4616 :
4617 0 : if (stat->io_error != NULL) {
4618 0 : spdk_json_write_named_object_begin(w, "io_error");
4619 0 : for (i = 0; i < -SPDK_MIN_BDEV_IO_STATUS; i++) {
4620 0 : if (stat->io_error->error_status[i] != 0) {
4621 0 : spdk_json_write_named_uint32(w, bdev_io_status_get_string(-(i + 1)),
4622 0 : stat->io_error->error_status[i]);
4623 0 : }
4624 0 : }
4625 0 : spdk_json_write_object_end(w);
4626 0 : }
4627 0 : }
4628 :
4629 : static void
4630 78 : bdev_channel_abort_queued_ios(struct spdk_bdev_channel *ch)
4631 : {
4632 78 : struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource;
4633 78 : struct spdk_bdev_mgmt_channel *mgmt_ch = shared_resource->mgmt_ch;
4634 :
4635 78 : bdev_abort_all_queued_io(&shared_resource->nomem_io, ch);
4636 78 : bdev_abort_all_buf_io(mgmt_ch, ch);
4637 78 : }
4638 :
4639 : static void
4640 74 : bdev_channel_destroy(void *io_device, void *ctx_buf)
4641 : {
4642 74 : struct spdk_bdev_channel *ch = ctx_buf;
4643 :
4644 74 : SPDK_DEBUGLOG(bdev, "Destroying channel %p for bdev %s on thread %p\n", ch, ch->bdev->name,
4645 : spdk_get_thread());
4646 :
4647 74 : spdk_trace_record(TRACE_BDEV_IOCH_DESTROY, ch->bdev->internal.trace_id, 0, 0,
4648 : spdk_thread_get_id(spdk_io_channel_get_thread(ch->channel)));
4649 :
4650 : /* This channel is going away, so add its statistics into the bdev so that they don't get lost. */
4651 74 : spdk_spin_lock(&ch->bdev->internal.spinlock);
4652 74 : spdk_bdev_add_io_stat(ch->bdev->internal.stat, ch->stat);
4653 74 : spdk_spin_unlock(&ch->bdev->internal.spinlock);
4654 :
4655 74 : bdev_abort_all_queued_io(&ch->queued_resets, ch);
4656 :
4657 74 : bdev_channel_abort_queued_ios(ch);
4658 :
4659 74 : if (ch->histogram) {
4660 0 : spdk_histogram_data_free(ch->histogram);
4661 0 : }
4662 :
4663 74 : bdev_channel_destroy_resource(ch);
4664 74 : }
4665 :
4666 : /*
4667 : * If the name already exists in the global bdev name tree, RB_INSERT() returns a pointer
4668 : * to it. Hence we do not have to call bdev_get_by_name() when using this function.
4669 : */
4670 : static int
4671 257 : bdev_name_add(struct spdk_bdev_name *bdev_name, struct spdk_bdev *bdev, const char *name)
4672 : {
4673 : struct spdk_bdev_name *tmp;
4674 :
4675 257 : bdev_name->name = strdup(name);
4676 257 : if (bdev_name->name == NULL) {
4677 0 : SPDK_ERRLOG("Unable to allocate bdev name\n");
4678 0 : return -ENOMEM;
4679 : }
4680 :
4681 257 : bdev_name->bdev = bdev;
4682 :
4683 257 : spdk_spin_lock(&g_bdev_mgr.spinlock);
4684 257 : tmp = RB_INSERT(bdev_name_tree, &g_bdev_mgr.bdev_names, bdev_name);
4685 257 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
4686 :
4687 257 : if (tmp != NULL) {
4688 4 : SPDK_ERRLOG("Bdev name %s already exists\n", name);
4689 4 : free(bdev_name->name);
4690 4 : return -EEXIST;
4691 : }
4692 :
4693 253 : return 0;
4694 257 : }
4695 :
4696 : static void
4697 253 : bdev_name_del_unsafe(struct spdk_bdev_name *bdev_name)
4698 : {
4699 253 : RB_REMOVE(bdev_name_tree, &g_bdev_mgr.bdev_names, bdev_name);
4700 253 : free(bdev_name->name);
4701 253 : }
4702 :
4703 : static void
4704 5 : bdev_name_del(struct spdk_bdev_name *bdev_name)
4705 : {
4706 5 : spdk_spin_lock(&g_bdev_mgr.spinlock);
4707 5 : bdev_name_del_unsafe(bdev_name);
4708 5 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
4709 5 : }
4710 :
4711 : int
4712 133 : spdk_bdev_alias_add(struct spdk_bdev *bdev, const char *alias)
4713 : {
4714 : struct spdk_bdev_alias *tmp;
4715 : int ret;
4716 :
4717 133 : if (alias == NULL) {
4718 1 : SPDK_ERRLOG("Empty alias passed\n");
4719 1 : return -EINVAL;
4720 : }
4721 :
4722 132 : tmp = calloc(1, sizeof(*tmp));
4723 132 : if (tmp == NULL) {
4724 0 : SPDK_ERRLOG("Unable to allocate alias\n");
4725 0 : return -ENOMEM;
4726 : }
4727 :
4728 132 : ret = bdev_name_add(&tmp->alias, bdev, alias);
4729 132 : if (ret != 0) {
4730 4 : free(tmp);
4731 4 : return ret;
4732 : }
4733 :
4734 128 : TAILQ_INSERT_TAIL(&bdev->aliases, tmp, tailq);
4735 :
4736 128 : return 0;
4737 133 : }
4738 :
4739 : static int
4740 129 : bdev_alias_del(struct spdk_bdev *bdev, const char *alias,
4741 : void (*alias_del_fn)(struct spdk_bdev_name *n))
4742 : {
4743 : struct spdk_bdev_alias *tmp;
4744 :
4745 134 : TAILQ_FOREACH(tmp, &bdev->aliases, tailq) {
4746 130 : if (strcmp(alias, tmp->alias.name) == 0) {
4747 125 : TAILQ_REMOVE(&bdev->aliases, tmp, tailq);
4748 125 : alias_del_fn(&tmp->alias);
4749 125 : free(tmp);
4750 125 : return 0;
4751 : }
4752 5 : }
4753 :
4754 4 : return -ENOENT;
4755 129 : }
4756 :
4757 : int
4758 4 : spdk_bdev_alias_del(struct spdk_bdev *bdev, const char *alias)
4759 : {
4760 : int rc;
4761 :
4762 4 : rc = bdev_alias_del(bdev, alias, bdev_name_del);
4763 4 : if (rc == -ENOENT) {
4764 2 : SPDK_INFOLOG(bdev, "Alias %s does not exist\n", alias);
4765 2 : }
4766 :
4767 4 : return rc;
4768 : }
4769 :
4770 : void
4771 2 : spdk_bdev_alias_del_all(struct spdk_bdev *bdev)
4772 : {
4773 : struct spdk_bdev_alias *p, *tmp;
4774 :
4775 5 : TAILQ_FOREACH_SAFE(p, &bdev->aliases, tailq, tmp) {
4776 3 : TAILQ_REMOVE(&bdev->aliases, p, tailq);
4777 3 : bdev_name_del(&p->alias);
4778 3 : free(p);
4779 3 : }
4780 2 : }
4781 :
4782 : struct spdk_io_channel *
4783 76 : spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc)
4784 : {
4785 76 : return spdk_get_io_channel(__bdev_to_io_dev(spdk_bdev_desc_get_bdev(desc)));
4786 : }
4787 :
4788 : void *
4789 0 : spdk_bdev_get_module_ctx(struct spdk_bdev_desc *desc)
4790 : {
4791 0 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
4792 0 : void *ctx = NULL;
4793 :
4794 0 : if (bdev->fn_table->get_module_ctx) {
4795 0 : ctx = bdev->fn_table->get_module_ctx(bdev->ctxt);
4796 0 : }
4797 :
4798 0 : return ctx;
4799 : }
4800 :
4801 : const char *
4802 0 : spdk_bdev_get_module_name(const struct spdk_bdev *bdev)
4803 : {
4804 0 : return bdev->module->name;
4805 : }
4806 :
4807 : const char *
4808 253 : spdk_bdev_get_name(const struct spdk_bdev *bdev)
4809 : {
4810 253 : return bdev->name;
4811 : }
4812 :
4813 : const char *
4814 0 : spdk_bdev_get_product_name(const struct spdk_bdev *bdev)
4815 : {
4816 0 : return bdev->product_name;
4817 : }
4818 :
4819 : const struct spdk_bdev_aliases_list *
4820 0 : spdk_bdev_get_aliases(const struct spdk_bdev *bdev)
4821 : {
4822 0 : return &bdev->aliases;
4823 : }
4824 :
4825 : uint32_t
4826 5 : spdk_bdev_get_block_size(const struct spdk_bdev *bdev)
4827 : {
4828 5 : return bdev->blocklen;
4829 : }
4830 :
4831 : uint32_t
4832 0 : spdk_bdev_get_write_unit_size(const struct spdk_bdev *bdev)
4833 : {
4834 0 : return bdev->write_unit_size;
4835 : }
4836 :
4837 : uint64_t
4838 0 : spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev)
4839 : {
4840 0 : return bdev->blockcnt;
4841 : }
4842 :
4843 : const char *
4844 0 : spdk_bdev_get_qos_rpc_type(enum spdk_bdev_qos_rate_limit_type type)
4845 : {
4846 0 : return qos_rpc_type[type];
4847 : }
4848 :
4849 : void
4850 0 : spdk_bdev_get_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits)
4851 : {
4852 : int i;
4853 :
4854 0 : memset(limits, 0, sizeof(*limits) * SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES);
4855 :
4856 0 : spdk_spin_lock(&bdev->internal.spinlock);
4857 0 : if (bdev->internal.qos) {
4858 0 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
4859 0 : if (bdev->internal.qos->rate_limits[i].limit !=
4860 : SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
4861 0 : limits[i] = bdev->internal.qos->rate_limits[i].limit;
4862 0 : if (bdev_qos_is_iops_rate_limit(i) == false) {
4863 : /* Change from Byte to Megabyte which is user visible. */
4864 0 : limits[i] = limits[i] / 1024 / 1024;
4865 0 : }
4866 0 : }
4867 0 : }
4868 0 : }
4869 0 : spdk_spin_unlock(&bdev->internal.spinlock);
4870 0 : }
4871 :
4872 : size_t
4873 317 : spdk_bdev_get_buf_align(const struct spdk_bdev *bdev)
4874 : {
4875 317 : return 1 << bdev->required_alignment;
4876 : }
4877 :
4878 : uint32_t
4879 0 : spdk_bdev_get_optimal_io_boundary(const struct spdk_bdev *bdev)
4880 : {
4881 0 : return bdev->optimal_io_boundary;
4882 : }
4883 :
4884 : bool
4885 0 : spdk_bdev_has_write_cache(const struct spdk_bdev *bdev)
4886 : {
4887 0 : return bdev->write_cache;
4888 : }
4889 :
4890 : const struct spdk_uuid *
4891 0 : spdk_bdev_get_uuid(const struct spdk_bdev *bdev)
4892 : {
4893 0 : return &bdev->uuid;
4894 : }
4895 :
4896 : uint16_t
4897 0 : spdk_bdev_get_acwu(const struct spdk_bdev *bdev)
4898 : {
4899 0 : return bdev->acwu;
4900 : }
4901 :
4902 : uint32_t
4903 29 : spdk_bdev_get_md_size(const struct spdk_bdev *bdev)
4904 : {
4905 29 : return bdev->md_len;
4906 : }
4907 :
4908 : bool
4909 130 : spdk_bdev_is_md_interleaved(const struct spdk_bdev *bdev)
4910 : {
4911 130 : return (bdev->md_len != 0) && bdev->md_interleave;
4912 : }
4913 :
4914 : bool
4915 159 : spdk_bdev_is_md_separate(const struct spdk_bdev *bdev)
4916 : {
4917 159 : return (bdev->md_len != 0) && !bdev->md_interleave;
4918 : }
4919 :
4920 : bool
4921 0 : spdk_bdev_is_zoned(const struct spdk_bdev *bdev)
4922 : {
4923 0 : return bdev->zoned;
4924 : }
4925 :
4926 : uint32_t
4927 121 : spdk_bdev_get_data_block_size(const struct spdk_bdev *bdev)
4928 : {
4929 121 : if (spdk_bdev_is_md_interleaved(bdev)) {
4930 0 : return bdev->blocklen - bdev->md_len;
4931 : } else {
4932 121 : return bdev->blocklen;
4933 : }
4934 121 : }
4935 :
4936 : uint32_t
4937 0 : spdk_bdev_get_physical_block_size(const struct spdk_bdev *bdev)
4938 : {
4939 0 : return bdev->phys_blocklen;
4940 : }
4941 :
4942 : static uint32_t
4943 9 : _bdev_get_block_size_with_md(const struct spdk_bdev *bdev)
4944 : {
4945 9 : if (!spdk_bdev_is_md_interleaved(bdev)) {
4946 6 : return bdev->blocklen + bdev->md_len;
4947 : } else {
4948 3 : return bdev->blocklen;
4949 : }
4950 9 : }
4951 :
4952 : /* We have to use the typedef in the function declaration to appease astyle. */
4953 : typedef enum spdk_dif_type spdk_dif_type_t;
4954 : typedef enum spdk_dif_pi_format spdk_dif_pi_format_t;
4955 :
4956 : spdk_dif_type_t
4957 0 : spdk_bdev_get_dif_type(const struct spdk_bdev *bdev)
4958 : {
4959 0 : if (bdev->md_len != 0) {
4960 0 : return bdev->dif_type;
4961 : } else {
4962 0 : return SPDK_DIF_DISABLE;
4963 : }
4964 0 : }
4965 :
4966 : spdk_dif_pi_format_t
4967 0 : spdk_bdev_get_dif_pi_format(const struct spdk_bdev *bdev)
4968 : {
4969 0 : return bdev->dif_pi_format;
4970 : }
4971 :
4972 : bool
4973 0 : spdk_bdev_is_dif_head_of_md(const struct spdk_bdev *bdev)
4974 : {
4975 0 : if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
4976 0 : return bdev->dif_is_head_of_md;
4977 : } else {
4978 0 : return false;
4979 : }
4980 0 : }
4981 :
4982 : bool
4983 0 : spdk_bdev_is_dif_check_enabled(const struct spdk_bdev *bdev,
4984 : enum spdk_dif_check_type check_type)
4985 : {
4986 0 : if (spdk_bdev_get_dif_type(bdev) == SPDK_DIF_DISABLE) {
4987 0 : return false;
4988 : }
4989 :
4990 0 : switch (check_type) {
4991 : case SPDK_DIF_CHECK_TYPE_REFTAG:
4992 0 : return (bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK) != 0;
4993 : case SPDK_DIF_CHECK_TYPE_APPTAG:
4994 0 : return (bdev->dif_check_flags & SPDK_DIF_FLAGS_APPTAG_CHECK) != 0;
4995 : case SPDK_DIF_CHECK_TYPE_GUARD:
4996 0 : return (bdev->dif_check_flags & SPDK_DIF_FLAGS_GUARD_CHECK) != 0;
4997 : default:
4998 0 : return false;
4999 : }
5000 0 : }
5001 :
5002 : static uint32_t
5003 3 : bdev_get_max_write(const struct spdk_bdev *bdev, uint64_t num_bytes)
5004 : {
5005 : uint64_t aligned_length, max_write_blocks;
5006 :
5007 3 : aligned_length = num_bytes - (spdk_bdev_get_buf_align(bdev) - 1);
5008 3 : max_write_blocks = aligned_length / _bdev_get_block_size_with_md(bdev);
5009 3 : max_write_blocks -= max_write_blocks % bdev->write_unit_size;
5010 :
5011 3 : return max_write_blocks;
5012 : }
5013 :
5014 : uint32_t
5015 1 : spdk_bdev_get_max_copy(const struct spdk_bdev *bdev)
5016 : {
5017 1 : return bdev->max_copy;
5018 : }
5019 :
5020 : uint64_t
5021 0 : spdk_bdev_get_qd(const struct spdk_bdev *bdev)
5022 : {
5023 0 : return bdev->internal.measured_queue_depth;
5024 : }
5025 :
5026 : uint64_t
5027 0 : spdk_bdev_get_qd_sampling_period(const struct spdk_bdev *bdev)
5028 : {
5029 0 : return bdev->internal.period;
5030 : }
5031 :
5032 : uint64_t
5033 0 : spdk_bdev_get_weighted_io_time(const struct spdk_bdev *bdev)
5034 : {
5035 0 : return bdev->internal.weighted_io_time;
5036 : }
5037 :
5038 : uint64_t
5039 0 : spdk_bdev_get_io_time(const struct spdk_bdev *bdev)
5040 : {
5041 0 : return bdev->internal.io_time;
5042 : }
5043 :
5044 0 : union spdk_bdev_nvme_ctratt spdk_bdev_get_nvme_ctratt(struct spdk_bdev *bdev)
5045 : {
5046 0 : return bdev->ctratt;
5047 : }
5048 :
5049 : uint32_t
5050 0 : spdk_bdev_get_nvme_nsid(struct spdk_bdev *bdev)
5051 : {
5052 0 : return bdev->nsid;
5053 : }
5054 :
5055 : uint32_t
5056 0 : spdk_bdev_desc_get_block_size(struct spdk_bdev_desc *desc)
5057 : {
5058 0 : struct spdk_bdev *bdev = desc->bdev;
5059 :
5060 0 : return desc->opts.hide_metadata ? bdev->blocklen - bdev->md_len : bdev->blocklen;
5061 : }
5062 :
5063 : uint32_t
5064 0 : spdk_bdev_desc_get_md_size(struct spdk_bdev_desc *desc)
5065 : {
5066 0 : struct spdk_bdev *bdev = desc->bdev;
5067 :
5068 0 : return desc->opts.hide_metadata ? 0 : bdev->md_len;
5069 : }
5070 :
5071 : bool
5072 0 : spdk_bdev_desc_is_md_interleaved(struct spdk_bdev_desc *desc)
5073 : {
5074 0 : struct spdk_bdev *bdev = desc->bdev;
5075 :
5076 0 : return desc->opts.hide_metadata ? false : spdk_bdev_is_md_interleaved(bdev);
5077 : }
5078 :
5079 : bool
5080 0 : spdk_bdev_desc_is_md_separate(struct spdk_bdev_desc *desc)
5081 : {
5082 0 : struct spdk_bdev *bdev = desc->bdev;
5083 :
5084 0 : return desc->opts.hide_metadata ? false : spdk_bdev_is_md_separate(bdev);
5085 : }
5086 :
5087 : spdk_dif_type_t
5088 0 : spdk_bdev_desc_get_dif_type(struct spdk_bdev_desc *desc)
5089 : {
5090 0 : struct spdk_bdev *bdev = desc->bdev;
5091 :
5092 0 : return desc->opts.hide_metadata ? SPDK_DIF_DISABLE : spdk_bdev_get_dif_type(bdev);
5093 : }
5094 :
5095 : spdk_dif_pi_format_t
5096 0 : spdk_bdev_desc_get_dif_pi_format(struct spdk_bdev_desc *desc)
5097 : {
5098 0 : struct spdk_bdev *bdev = desc->bdev;
5099 :
5100 0 : return desc->opts.hide_metadata ? SPDK_DIF_PI_FORMAT_16 : spdk_bdev_get_dif_pi_format(bdev);
5101 : }
5102 :
5103 : bool
5104 0 : spdk_bdev_desc_is_dif_head_of_md(struct spdk_bdev_desc *desc)
5105 : {
5106 0 : struct spdk_bdev *bdev = desc->bdev;
5107 :
5108 0 : return desc->opts.hide_metadata ? false : spdk_bdev_is_dif_head_of_md(bdev);
5109 : }
5110 :
5111 : bool
5112 0 : spdk_bdev_desc_is_dif_check_enabled(struct spdk_bdev_desc *desc,
5113 : enum spdk_dif_check_type check_type)
5114 : {
5115 0 : struct spdk_bdev *bdev = desc->bdev;
5116 :
5117 0 : return desc->opts.hide_metadata ? false : spdk_bdev_is_dif_check_enabled(bdev, check_type);
5118 : }
5119 :
5120 : static void bdev_update_qd_sampling_period(void *ctx);
5121 :
5122 : static void
5123 1 : _calculate_measured_qd_cpl(struct spdk_bdev *bdev, void *_ctx, int status)
5124 : {
5125 1 : bdev->internal.measured_queue_depth = bdev->internal.temporary_queue_depth;
5126 :
5127 1 : if (bdev->internal.measured_queue_depth) {
5128 0 : bdev->internal.io_time += bdev->internal.period;
5129 0 : bdev->internal.weighted_io_time += bdev->internal.period * bdev->internal.measured_queue_depth;
5130 0 : }
5131 :
5132 1 : bdev->internal.qd_poll_in_progress = false;
5133 :
5134 1 : bdev_update_qd_sampling_period(bdev);
5135 1 : }
5136 :
5137 : static void
5138 1 : _calculate_measured_qd(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
5139 : struct spdk_io_channel *io_ch, void *_ctx)
5140 : {
5141 1 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(io_ch);
5142 :
5143 1 : bdev->internal.temporary_queue_depth += ch->io_outstanding;
5144 1 : spdk_bdev_for_each_channel_continue(i, 0);
5145 1 : }
5146 :
5147 : static int
5148 1 : bdev_calculate_measured_queue_depth(void *ctx)
5149 : {
5150 1 : struct spdk_bdev *bdev = ctx;
5151 :
5152 1 : bdev->internal.qd_poll_in_progress = true;
5153 1 : bdev->internal.temporary_queue_depth = 0;
5154 1 : spdk_bdev_for_each_channel(bdev, _calculate_measured_qd, bdev, _calculate_measured_qd_cpl);
5155 1 : return SPDK_POLLER_BUSY;
5156 : }
5157 :
5158 : static void
5159 5 : bdev_update_qd_sampling_period(void *ctx)
5160 : {
5161 5 : struct spdk_bdev *bdev = ctx;
5162 :
5163 5 : if (bdev->internal.period == bdev->internal.new_period) {
5164 0 : return;
5165 : }
5166 :
5167 5 : if (bdev->internal.qd_poll_in_progress) {
5168 1 : return;
5169 : }
5170 :
5171 4 : bdev->internal.period = bdev->internal.new_period;
5172 :
5173 4 : spdk_poller_unregister(&bdev->internal.qd_poller);
5174 4 : if (bdev->internal.period != 0) {
5175 2 : bdev->internal.qd_poller = SPDK_POLLER_REGISTER(bdev_calculate_measured_queue_depth,
5176 : bdev, bdev->internal.period);
5177 2 : } else {
5178 2 : spdk_bdev_close(bdev->internal.qd_desc);
5179 2 : bdev->internal.qd_desc = NULL;
5180 : }
5181 5 : }
5182 :
5183 : static void
5184 0 : _tmp_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
5185 : {
5186 0 : SPDK_NOTICELOG("Unexpected event type: %d\n", type);
5187 0 : }
5188 :
5189 : void
5190 130 : spdk_bdev_set_qd_sampling_period(struct spdk_bdev *bdev, uint64_t period)
5191 : {
5192 : int rc;
5193 :
5194 130 : if (bdev->internal.new_period == period) {
5195 124 : return;
5196 : }
5197 :
5198 6 : bdev->internal.new_period = period;
5199 :
5200 6 : if (bdev->internal.qd_desc != NULL) {
5201 4 : assert(bdev->internal.period != 0);
5202 :
5203 8 : spdk_thread_send_msg(bdev->internal.qd_desc->thread,
5204 4 : bdev_update_qd_sampling_period, bdev);
5205 4 : return;
5206 : }
5207 :
5208 2 : assert(bdev->internal.period == 0);
5209 :
5210 4 : rc = spdk_bdev_open_ext(spdk_bdev_get_name(bdev), false, _tmp_bdev_event_cb,
5211 2 : NULL, &bdev->internal.qd_desc);
5212 2 : if (rc != 0) {
5213 0 : return;
5214 : }
5215 :
5216 2 : bdev->internal.period = period;
5217 2 : bdev->internal.qd_poller = SPDK_POLLER_REGISTER(bdev_calculate_measured_queue_depth,
5218 : bdev, period);
5219 130 : }
5220 :
5221 : struct bdev_get_current_qd_ctx {
5222 : uint64_t current_qd;
5223 : spdk_bdev_get_current_qd_cb cb_fn;
5224 : void *cb_arg;
5225 : };
5226 :
5227 : static void
5228 0 : bdev_get_current_qd_done(struct spdk_bdev *bdev, void *_ctx, int status)
5229 : {
5230 0 : struct bdev_get_current_qd_ctx *ctx = _ctx;
5231 :
5232 0 : ctx->cb_fn(bdev, ctx->current_qd, ctx->cb_arg, 0);
5233 :
5234 0 : free(ctx);
5235 0 : }
5236 :
5237 : static void
5238 0 : bdev_get_current_qd(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
5239 : struct spdk_io_channel *io_ch, void *_ctx)
5240 : {
5241 0 : struct bdev_get_current_qd_ctx *ctx = _ctx;
5242 0 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(io_ch);
5243 :
5244 0 : ctx->current_qd += bdev_ch->io_outstanding;
5245 :
5246 0 : spdk_bdev_for_each_channel_continue(i, 0);
5247 0 : }
5248 :
5249 : void
5250 0 : spdk_bdev_get_current_qd(struct spdk_bdev *bdev, spdk_bdev_get_current_qd_cb cb_fn,
5251 : void *cb_arg)
5252 : {
5253 : struct bdev_get_current_qd_ctx *ctx;
5254 :
5255 0 : assert(cb_fn != NULL);
5256 :
5257 0 : ctx = calloc(1, sizeof(*ctx));
5258 0 : if (ctx == NULL) {
5259 0 : cb_fn(bdev, 0, cb_arg, -ENOMEM);
5260 0 : return;
5261 : }
5262 :
5263 0 : ctx->cb_fn = cb_fn;
5264 0 : ctx->cb_arg = cb_arg;
5265 :
5266 0 : spdk_bdev_for_each_channel(bdev, bdev_get_current_qd, ctx, bdev_get_current_qd_done);
5267 0 : }
5268 :
5269 : static void
5270 25 : _event_notify(struct spdk_bdev_desc *desc, enum spdk_bdev_event_type type)
5271 : {
5272 25 : assert(desc->thread == spdk_get_thread());
5273 :
5274 25 : spdk_spin_lock(&desc->spinlock);
5275 25 : desc->refs--;
5276 25 : if (!desc->closed) {
5277 14 : spdk_spin_unlock(&desc->spinlock);
5278 28 : desc->callback.event_fn(type,
5279 14 : desc->bdev,
5280 14 : desc->callback.ctx);
5281 14 : return;
5282 11 : } else if (desc->refs == 0) {
5283 : /* This descriptor was closed after this event_notify message was sent.
5284 : * spdk_bdev_close() could not free the descriptor since this message was
5285 : * in flight, so we free it now using bdev_desc_free().
5286 : */
5287 10 : spdk_spin_unlock(&desc->spinlock);
5288 10 : bdev_desc_free(desc);
5289 10 : return;
5290 : }
5291 1 : spdk_spin_unlock(&desc->spinlock);
5292 25 : }
5293 :
5294 : static void
5295 25 : event_notify(struct spdk_bdev_desc *desc, spdk_msg_fn event_notify_fn)
5296 : {
5297 25 : spdk_spin_lock(&desc->spinlock);
5298 25 : desc->refs++;
5299 25 : spdk_thread_send_msg(desc->thread, event_notify_fn, desc);
5300 25 : spdk_spin_unlock(&desc->spinlock);
5301 25 : }
5302 :
5303 : static void
5304 6 : _resize_notify(void *ctx)
5305 : {
5306 6 : struct spdk_bdev_desc *desc = ctx;
5307 :
5308 6 : _event_notify(desc, SPDK_BDEV_EVENT_RESIZE);
5309 6 : }
5310 :
5311 : int
5312 11 : spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size)
5313 : {
5314 : struct spdk_bdev_desc *desc;
5315 : int ret;
5316 :
5317 11 : if (size == bdev->blockcnt) {
5318 0 : return 0;
5319 : }
5320 :
5321 11 : spdk_spin_lock(&bdev->internal.spinlock);
5322 :
5323 : /* bdev has open descriptors */
5324 11 : if (!TAILQ_EMPTY(&bdev->internal.open_descs) &&
5325 7 : bdev->blockcnt > size) {
5326 1 : ret = -EBUSY;
5327 1 : } else {
5328 10 : bdev->blockcnt = size;
5329 16 : TAILQ_FOREACH(desc, &bdev->internal.open_descs, link) {
5330 6 : event_notify(desc, _resize_notify);
5331 6 : }
5332 10 : ret = 0;
5333 : }
5334 :
5335 11 : spdk_spin_unlock(&bdev->internal.spinlock);
5336 :
5337 11 : return ret;
5338 11 : }
5339 :
5340 : /*
5341 : * Convert I/O offset and length from bytes to blocks.
5342 : *
5343 : * Returns zero on success or non-zero if the byte parameters aren't divisible by the block size.
5344 : */
5345 : static uint64_t
5346 20 : bdev_bytes_to_blocks(struct spdk_bdev_desc *desc, uint64_t offset_bytes,
5347 : uint64_t *offset_blocks, uint64_t num_bytes, uint64_t *num_blocks)
5348 : {
5349 20 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5350 20 : uint32_t block_size = bdev->blocklen;
5351 : uint8_t shift_cnt;
5352 :
5353 : /* Avoid expensive div operations if possible. These spdk_u32 functions are very cheap. */
5354 20 : if (spdk_likely(spdk_u32_is_pow2(block_size))) {
5355 17 : shift_cnt = spdk_u32log2(block_size);
5356 17 : *offset_blocks = offset_bytes >> shift_cnt;
5357 17 : *num_blocks = num_bytes >> shift_cnt;
5358 34 : return (offset_bytes - (*offset_blocks << shift_cnt)) |
5359 17 : (num_bytes - (*num_blocks << shift_cnt));
5360 : } else {
5361 3 : *offset_blocks = offset_bytes / block_size;
5362 3 : *num_blocks = num_bytes / block_size;
5363 3 : return (offset_bytes % block_size) | (num_bytes % block_size);
5364 : }
5365 20 : }
5366 :
5367 : static bool
5368 689 : bdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t offset_blocks, uint64_t num_blocks)
5369 : {
5370 : /* Return failure if offset_blocks + num_blocks is less than offset_blocks; indicates there
5371 : * has been an overflow and hence the offset has been wrapped around */
5372 689 : if (offset_blocks + num_blocks < offset_blocks) {
5373 1 : return false;
5374 : }
5375 :
5376 : /* Return failure if offset_blocks + num_blocks exceeds the size of the bdev */
5377 688 : if (offset_blocks + num_blocks > bdev->blockcnt) {
5378 2 : return false;
5379 : }
5380 :
5381 686 : return true;
5382 689 : }
5383 :
5384 : static void
5385 2 : bdev_seek_complete_cb(void *ctx)
5386 : {
5387 2 : struct spdk_bdev_io *bdev_io = ctx;
5388 :
5389 2 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
5390 2 : bdev_io->internal.cb(bdev_io, true, bdev_io->internal.caller_ctx);
5391 2 : }
5392 :
5393 : static int
5394 4 : bdev_seek(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5395 : uint64_t offset_blocks, enum spdk_bdev_io_type io_type,
5396 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5397 : {
5398 4 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5399 : struct spdk_bdev_io *bdev_io;
5400 4 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
5401 :
5402 4 : assert(io_type == SPDK_BDEV_IO_TYPE_SEEK_DATA || io_type == SPDK_BDEV_IO_TYPE_SEEK_HOLE);
5403 :
5404 : /* Check if offset_blocks is valid looking at the validity of one block */
5405 4 : if (!bdev_io_valid_blocks(bdev, offset_blocks, 1)) {
5406 0 : return -EINVAL;
5407 : }
5408 :
5409 4 : bdev_io = bdev_channel_get_io(channel);
5410 4 : if (!bdev_io) {
5411 0 : return -ENOMEM;
5412 : }
5413 :
5414 4 : bdev_io->internal.ch = channel;
5415 4 : bdev_io->internal.desc = desc;
5416 4 : bdev_io->type = io_type;
5417 4 : bdev_io->u.bdev.offset_blocks = offset_blocks;
5418 4 : bdev_io->u.bdev.memory_domain = NULL;
5419 4 : bdev_io->u.bdev.memory_domain_ctx = NULL;
5420 4 : bdev_io->u.bdev.accel_sequence = NULL;
5421 4 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
5422 :
5423 4 : if (!spdk_bdev_io_type_supported(bdev, io_type)) {
5424 : /* In case bdev doesn't support seek to next data/hole offset,
5425 : * it is assumed that only data and no holes are present */
5426 2 : if (io_type == SPDK_BDEV_IO_TYPE_SEEK_DATA) {
5427 1 : bdev_io->u.bdev.seek.offset = offset_blocks;
5428 1 : } else {
5429 1 : bdev_io->u.bdev.seek.offset = UINT64_MAX;
5430 : }
5431 :
5432 2 : spdk_thread_send_msg(spdk_get_thread(), bdev_seek_complete_cb, bdev_io);
5433 2 : return 0;
5434 : }
5435 :
5436 2 : bdev_io_submit(bdev_io);
5437 2 : return 0;
5438 4 : }
5439 :
5440 : int
5441 2 : spdk_bdev_seek_data(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5442 : uint64_t offset_blocks,
5443 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5444 : {
5445 2 : return bdev_seek(desc, ch, offset_blocks, SPDK_BDEV_IO_TYPE_SEEK_DATA, cb, cb_arg);
5446 : }
5447 :
5448 : int
5449 2 : spdk_bdev_seek_hole(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5450 : uint64_t offset_blocks,
5451 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5452 : {
5453 2 : return bdev_seek(desc, ch, offset_blocks, SPDK_BDEV_IO_TYPE_SEEK_HOLE, cb, cb_arg);
5454 : }
5455 :
5456 : uint64_t
5457 4 : spdk_bdev_io_get_seek_offset(const struct spdk_bdev_io *bdev_io)
5458 : {
5459 4 : return bdev_io->u.bdev.seek.offset;
5460 : }
5461 :
5462 : static int
5463 204 : bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf,
5464 : void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
5465 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5466 : {
5467 204 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5468 : struct spdk_bdev_io *bdev_io;
5469 204 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
5470 :
5471 204 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
5472 0 : return -EINVAL;
5473 : }
5474 :
5475 204 : bdev_io = bdev_channel_get_io(channel);
5476 204 : if (!bdev_io) {
5477 1 : return -ENOMEM;
5478 : }
5479 :
5480 203 : bdev_io->internal.ch = channel;
5481 203 : bdev_io->internal.desc = desc;
5482 203 : bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
5483 203 : bdev_io->u.bdev.iovs = &bdev_io->iov;
5484 203 : bdev_io->u.bdev.iovs[0].iov_base = buf;
5485 203 : bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen;
5486 203 : bdev_io->u.bdev.iovcnt = 1;
5487 203 : bdev_io->u.bdev.md_buf = md_buf;
5488 203 : bdev_io->u.bdev.num_blocks = num_blocks;
5489 203 : bdev_io->u.bdev.offset_blocks = offset_blocks;
5490 203 : bdev_io->u.bdev.memory_domain = NULL;
5491 203 : bdev_io->u.bdev.memory_domain_ctx = NULL;
5492 203 : bdev_io->u.bdev.accel_sequence = NULL;
5493 203 : bdev_io->u.bdev.dif_check_flags = bdev->dif_check_flags;
5494 203 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
5495 :
5496 203 : bdev_io_submit(bdev_io);
5497 203 : return 0;
5498 204 : }
5499 :
5500 : int
5501 3 : spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5502 : void *buf, uint64_t offset, uint64_t nbytes,
5503 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5504 : {
5505 : uint64_t offset_blocks, num_blocks;
5506 :
5507 3 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
5508 0 : return -EINVAL;
5509 : }
5510 :
5511 3 : return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
5512 3 : }
5513 :
5514 : int
5515 200 : spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5516 : void *buf, uint64_t offset_blocks, uint64_t num_blocks,
5517 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5518 : {
5519 200 : return bdev_read_blocks_with_md(desc, ch, buf, NULL, offset_blocks, num_blocks, cb, cb_arg);
5520 : }
5521 :
5522 : int
5523 4 : spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5524 : void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
5525 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5526 : {
5527 8 : struct iovec iov = {
5528 4 : .iov_base = buf,
5529 : };
5530 :
5531 4 : if (md_buf && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
5532 0 : return -EINVAL;
5533 : }
5534 :
5535 4 : if (md_buf && !_is_buf_allocated(&iov)) {
5536 0 : return -EINVAL;
5537 : }
5538 :
5539 8 : return bdev_read_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks,
5540 4 : cb, cb_arg);
5541 4 : }
5542 :
5543 : int
5544 5 : spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5545 : struct iovec *iov, int iovcnt,
5546 : uint64_t offset, uint64_t nbytes,
5547 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5548 : {
5549 : uint64_t offset_blocks, num_blocks;
5550 :
5551 5 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
5552 0 : return -EINVAL;
5553 : }
5554 :
5555 5 : return spdk_bdev_readv_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
5556 5 : }
5557 :
5558 : static int
5559 226 : bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5560 : struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
5561 : uint64_t num_blocks, struct spdk_memory_domain *domain, void *domain_ctx,
5562 : struct spdk_accel_sequence *seq, uint32_t dif_check_flags,
5563 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5564 : {
5565 226 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5566 : struct spdk_bdev_io *bdev_io;
5567 226 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
5568 :
5569 226 : if (spdk_unlikely(!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks))) {
5570 0 : return -EINVAL;
5571 : }
5572 :
5573 226 : bdev_io = bdev_channel_get_io(channel);
5574 226 : if (spdk_unlikely(!bdev_io)) {
5575 2 : return -ENOMEM;
5576 : }
5577 :
5578 224 : bdev_io->internal.ch = channel;
5579 224 : bdev_io->internal.desc = desc;
5580 224 : bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
5581 224 : bdev_io->u.bdev.iovs = iov;
5582 224 : bdev_io->u.bdev.iovcnt = iovcnt;
5583 224 : bdev_io->u.bdev.md_buf = md_buf;
5584 224 : bdev_io->u.bdev.num_blocks = num_blocks;
5585 224 : bdev_io->u.bdev.offset_blocks = offset_blocks;
5586 224 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
5587 :
5588 224 : if (seq != NULL) {
5589 0 : bdev_io->internal.f.has_accel_sequence = true;
5590 0 : bdev_io->internal.accel_sequence = seq;
5591 0 : }
5592 :
5593 224 : if (domain != NULL) {
5594 2 : bdev_io->internal.f.has_memory_domain = true;
5595 2 : bdev_io->internal.memory_domain = domain;
5596 2 : bdev_io->internal.memory_domain_ctx = domain_ctx;
5597 2 : }
5598 :
5599 224 : bdev_io->u.bdev.memory_domain = domain;
5600 224 : bdev_io->u.bdev.memory_domain_ctx = domain_ctx;
5601 224 : bdev_io->u.bdev.accel_sequence = seq;
5602 224 : bdev_io->u.bdev.dif_check_flags = dif_check_flags;
5603 :
5604 224 : _bdev_io_submit_ext(desc, bdev_io);
5605 :
5606 224 : return 0;
5607 226 : }
5608 :
5609 : int
5610 21 : spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5611 : struct iovec *iov, int iovcnt,
5612 : uint64_t offset_blocks, uint64_t num_blocks,
5613 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5614 : {
5615 21 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5616 :
5617 42 : return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
5618 21 : num_blocks, NULL, NULL, NULL, bdev->dif_check_flags, cb, cb_arg);
5619 : }
5620 :
5621 : int
5622 4 : spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5623 : struct iovec *iov, int iovcnt, void *md_buf,
5624 : uint64_t offset_blocks, uint64_t num_blocks,
5625 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5626 : {
5627 4 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5628 :
5629 4 : if (md_buf && !spdk_bdev_is_md_separate(bdev)) {
5630 0 : return -EINVAL;
5631 : }
5632 :
5633 4 : if (md_buf && !_is_buf_allocated(iov)) {
5634 0 : return -EINVAL;
5635 : }
5636 :
5637 8 : return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
5638 4 : num_blocks, NULL, NULL, NULL, bdev->dif_check_flags, cb, cb_arg);
5639 4 : }
5640 :
5641 : static inline bool
5642 14 : _bdev_io_check_opts(struct spdk_bdev_ext_io_opts *opts, struct iovec *iov)
5643 : {
5644 : /*
5645 : * We check if opts size is at least of size when we first introduced
5646 : * spdk_bdev_ext_io_opts (ac6f2bdd8d) since access to those members
5647 : * are not checked internal.
5648 : */
5649 24 : return opts->size >= offsetof(struct spdk_bdev_ext_io_opts, metadata) +
5650 14 : sizeof(opts->metadata) &&
5651 10 : opts->size <= sizeof(*opts) &&
5652 : /* When memory domain is used, the user must provide data buffers */
5653 8 : (!opts->memory_domain || (iov && iov[0].iov_base));
5654 : }
5655 :
5656 : int
5657 8 : spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5658 : struct iovec *iov, int iovcnt,
5659 : uint64_t offset_blocks, uint64_t num_blocks,
5660 : spdk_bdev_io_completion_cb cb, void *cb_arg,
5661 : struct spdk_bdev_ext_io_opts *opts)
5662 : {
5663 8 : struct spdk_memory_domain *domain = NULL;
5664 8 : struct spdk_accel_sequence *seq = NULL;
5665 8 : void *domain_ctx = NULL, *md = NULL;
5666 8 : uint32_t dif_check_flags = 0;
5667 8 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5668 :
5669 8 : if (opts) {
5670 7 : if (spdk_unlikely(!_bdev_io_check_opts(opts, iov))) {
5671 3 : return -EINVAL;
5672 : }
5673 :
5674 4 : md = opts->metadata;
5675 4 : domain = bdev_get_ext_io_opt(opts, memory_domain, NULL);
5676 4 : domain_ctx = bdev_get_ext_io_opt(opts, memory_domain_ctx, NULL);
5677 4 : seq = bdev_get_ext_io_opt(opts, accel_sequence, NULL);
5678 4 : if (md) {
5679 4 : if (spdk_unlikely(!spdk_bdev_is_md_separate(bdev))) {
5680 0 : return -EINVAL;
5681 : }
5682 :
5683 4 : if (spdk_unlikely(!_is_buf_allocated(iov))) {
5684 0 : return -EINVAL;
5685 : }
5686 :
5687 4 : if (spdk_unlikely(seq != NULL)) {
5688 0 : return -EINVAL;
5689 : }
5690 4 : }
5691 4 : }
5692 :
5693 10 : dif_check_flags = bdev->dif_check_flags &
5694 5 : ~(bdev_get_ext_io_opt(opts, dif_check_flags_exclude_mask, 0));
5695 :
5696 10 : return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks,
5697 5 : num_blocks, domain, domain_ctx, seq, dif_check_flags, cb, cb_arg);
5698 8 : }
5699 :
5700 : static int
5701 36 : bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5702 : void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
5703 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5704 : {
5705 36 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5706 : struct spdk_bdev_io *bdev_io;
5707 36 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
5708 :
5709 36 : if (!desc->write) {
5710 0 : return -EBADF;
5711 : }
5712 :
5713 36 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
5714 0 : return -EINVAL;
5715 : }
5716 :
5717 36 : bdev_io = bdev_channel_get_io(channel);
5718 36 : if (!bdev_io) {
5719 0 : return -ENOMEM;
5720 : }
5721 :
5722 36 : bdev_io->internal.ch = channel;
5723 36 : bdev_io->internal.desc = desc;
5724 36 : bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
5725 36 : bdev_io->u.bdev.iovs = &bdev_io->iov;
5726 36 : bdev_io->u.bdev.iovs[0].iov_base = buf;
5727 36 : bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen;
5728 36 : bdev_io->u.bdev.iovcnt = 1;
5729 36 : bdev_io->u.bdev.md_buf = md_buf;
5730 36 : bdev_io->u.bdev.num_blocks = num_blocks;
5731 36 : bdev_io->u.bdev.offset_blocks = offset_blocks;
5732 36 : bdev_io->u.bdev.memory_domain = NULL;
5733 36 : bdev_io->u.bdev.memory_domain_ctx = NULL;
5734 36 : bdev_io->u.bdev.accel_sequence = NULL;
5735 36 : bdev_io->u.bdev.dif_check_flags = bdev->dif_check_flags;
5736 36 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
5737 :
5738 36 : bdev_io_submit(bdev_io);
5739 36 : return 0;
5740 36 : }
5741 :
5742 : int
5743 3 : spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5744 : void *buf, uint64_t offset, uint64_t nbytes,
5745 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5746 : {
5747 : uint64_t offset_blocks, num_blocks;
5748 :
5749 3 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
5750 0 : return -EINVAL;
5751 : }
5752 :
5753 3 : return spdk_bdev_write_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
5754 3 : }
5755 :
5756 : int
5757 27 : spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5758 : void *buf, uint64_t offset_blocks, uint64_t num_blocks,
5759 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5760 : {
5761 54 : return bdev_write_blocks_with_md(desc, ch, buf, NULL, offset_blocks, num_blocks,
5762 27 : cb, cb_arg);
5763 : }
5764 :
5765 : int
5766 3 : spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5767 : void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
5768 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5769 : {
5770 6 : struct iovec iov = {
5771 3 : .iov_base = buf,
5772 : };
5773 :
5774 3 : if (md_buf && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
5775 0 : return -EINVAL;
5776 : }
5777 :
5778 3 : if (md_buf && !_is_buf_allocated(&iov)) {
5779 0 : return -EINVAL;
5780 : }
5781 :
5782 6 : return bdev_write_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks,
5783 3 : cb, cb_arg);
5784 3 : }
5785 :
5786 : static int
5787 70 : bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5788 : struct iovec *iov, int iovcnt, void *md_buf,
5789 : uint64_t offset_blocks, uint64_t num_blocks,
5790 : struct spdk_memory_domain *domain, void *domain_ctx,
5791 : struct spdk_accel_sequence *seq, uint32_t dif_check_flags,
5792 : uint32_t nvme_cdw12_raw, uint32_t nvme_cdw13_raw,
5793 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5794 : {
5795 70 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5796 : struct spdk_bdev_io *bdev_io;
5797 70 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
5798 :
5799 70 : if (spdk_unlikely(!desc->write)) {
5800 0 : return -EBADF;
5801 : }
5802 :
5803 70 : if (spdk_unlikely(!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks))) {
5804 0 : return -EINVAL;
5805 : }
5806 :
5807 70 : bdev_io = bdev_channel_get_io(channel);
5808 70 : if (spdk_unlikely(!bdev_io)) {
5809 2 : return -ENOMEM;
5810 : }
5811 :
5812 68 : bdev_io->internal.ch = channel;
5813 68 : bdev_io->internal.desc = desc;
5814 68 : bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
5815 68 : bdev_io->u.bdev.iovs = iov;
5816 68 : bdev_io->u.bdev.iovcnt = iovcnt;
5817 68 : bdev_io->u.bdev.md_buf = md_buf;
5818 68 : bdev_io->u.bdev.num_blocks = num_blocks;
5819 68 : bdev_io->u.bdev.offset_blocks = offset_blocks;
5820 68 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
5821 68 : if (seq != NULL) {
5822 0 : bdev_io->internal.f.has_accel_sequence = true;
5823 0 : bdev_io->internal.accel_sequence = seq;
5824 0 : }
5825 :
5826 68 : if (domain != NULL) {
5827 2 : bdev_io->internal.f.has_memory_domain = true;
5828 2 : bdev_io->internal.memory_domain = domain;
5829 2 : bdev_io->internal.memory_domain_ctx = domain_ctx;
5830 2 : }
5831 :
5832 68 : bdev_io->u.bdev.memory_domain = domain;
5833 68 : bdev_io->u.bdev.memory_domain_ctx = domain_ctx;
5834 68 : bdev_io->u.bdev.accel_sequence = seq;
5835 68 : bdev_io->u.bdev.dif_check_flags = dif_check_flags;
5836 68 : bdev_io->u.bdev.nvme_cdw12.raw = nvme_cdw12_raw;
5837 68 : bdev_io->u.bdev.nvme_cdw13.raw = nvme_cdw13_raw;
5838 :
5839 68 : _bdev_io_submit_ext(desc, bdev_io);
5840 :
5841 68 : return 0;
5842 70 : }
5843 :
5844 : int
5845 3 : spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5846 : struct iovec *iov, int iovcnt,
5847 : uint64_t offset, uint64_t len,
5848 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5849 : {
5850 : uint64_t offset_blocks, num_blocks;
5851 :
5852 3 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, len, &num_blocks) != 0) {
5853 0 : return -EINVAL;
5854 : }
5855 :
5856 3 : return spdk_bdev_writev_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
5857 3 : }
5858 :
5859 : int
5860 14 : spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5861 : struct iovec *iov, int iovcnt,
5862 : uint64_t offset_blocks, uint64_t num_blocks,
5863 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5864 : {
5865 14 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5866 :
5867 28 : return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
5868 14 : num_blocks, NULL, NULL, NULL, bdev->dif_check_flags, 0, 0,
5869 14 : cb, cb_arg);
5870 : }
5871 :
5872 : int
5873 1 : spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5874 : struct iovec *iov, int iovcnt, void *md_buf,
5875 : uint64_t offset_blocks, uint64_t num_blocks,
5876 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5877 : {
5878 1 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5879 :
5880 1 : if (md_buf && !spdk_bdev_is_md_separate(bdev)) {
5881 0 : return -EINVAL;
5882 : }
5883 :
5884 1 : if (md_buf && !_is_buf_allocated(iov)) {
5885 0 : return -EINVAL;
5886 : }
5887 :
5888 2 : return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
5889 1 : num_blocks, NULL, NULL, NULL, bdev->dif_check_flags, 0, 0,
5890 1 : cb, cb_arg);
5891 1 : }
5892 :
5893 : int
5894 8 : spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5895 : struct iovec *iov, int iovcnt,
5896 : uint64_t offset_blocks, uint64_t num_blocks,
5897 : spdk_bdev_io_completion_cb cb, void *cb_arg,
5898 : struct spdk_bdev_ext_io_opts *opts)
5899 : {
5900 8 : struct spdk_memory_domain *domain = NULL;
5901 8 : struct spdk_accel_sequence *seq = NULL;
5902 8 : void *domain_ctx = NULL, *md = NULL;
5903 8 : uint32_t dif_check_flags = 0;
5904 8 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5905 8 : uint32_t nvme_cdw12_raw = 0;
5906 8 : uint32_t nvme_cdw13_raw = 0;
5907 :
5908 8 : if (opts) {
5909 7 : if (spdk_unlikely(!_bdev_io_check_opts(opts, iov))) {
5910 3 : return -EINVAL;
5911 : }
5912 4 : md = opts->metadata;
5913 4 : domain = bdev_get_ext_io_opt(opts, memory_domain, NULL);
5914 4 : domain_ctx = bdev_get_ext_io_opt(opts, memory_domain_ctx, NULL);
5915 4 : seq = bdev_get_ext_io_opt(opts, accel_sequence, NULL);
5916 4 : nvme_cdw12_raw = bdev_get_ext_io_opt(opts, nvme_cdw12.raw, 0);
5917 4 : nvme_cdw13_raw = bdev_get_ext_io_opt(opts, nvme_cdw13.raw, 0);
5918 4 : if (md) {
5919 4 : if (spdk_unlikely(!spdk_bdev_is_md_separate(bdev))) {
5920 0 : return -EINVAL;
5921 : }
5922 :
5923 4 : if (spdk_unlikely(!_is_buf_allocated(iov))) {
5924 0 : return -EINVAL;
5925 : }
5926 :
5927 4 : if (spdk_unlikely(seq != NULL)) {
5928 0 : return -EINVAL;
5929 : }
5930 4 : }
5931 4 : }
5932 :
5933 10 : dif_check_flags = bdev->dif_check_flags &
5934 5 : ~(bdev_get_ext_io_opt(opts, dif_check_flags_exclude_mask, 0));
5935 :
5936 10 : return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks, num_blocks,
5937 5 : domain, domain_ctx, seq, dif_check_flags,
5938 5 : nvme_cdw12_raw, nvme_cdw13_raw, cb, cb_arg);
5939 8 : }
5940 :
5941 : static void
5942 11 : bdev_compare_do_read_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
5943 : {
5944 11 : struct spdk_bdev_io *parent_io = cb_arg;
5945 11 : struct spdk_bdev *bdev = parent_io->bdev;
5946 11 : uint8_t *read_buf = bdev_io->u.bdev.iovs[0].iov_base;
5947 11 : int i, rc = 0;
5948 :
5949 11 : if (!success) {
5950 0 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
5951 0 : parent_io->internal.cb(parent_io, false, parent_io->internal.caller_ctx);
5952 0 : spdk_bdev_free_io(bdev_io);
5953 0 : return;
5954 : }
5955 :
5956 17 : for (i = 0; i < parent_io->u.bdev.iovcnt; i++) {
5957 22 : rc = memcmp(read_buf,
5958 11 : parent_io->u.bdev.iovs[i].iov_base,
5959 11 : parent_io->u.bdev.iovs[i].iov_len);
5960 11 : if (rc) {
5961 5 : break;
5962 : }
5963 6 : read_buf += parent_io->u.bdev.iovs[i].iov_len;
5964 6 : }
5965 :
5966 11 : if (rc == 0 && parent_io->u.bdev.md_buf && spdk_bdev_is_md_separate(bdev)) {
5967 4 : rc = memcmp(bdev_io->u.bdev.md_buf,
5968 2 : parent_io->u.bdev.md_buf,
5969 2 : spdk_bdev_get_md_size(bdev));
5970 2 : }
5971 :
5972 11 : spdk_bdev_free_io(bdev_io);
5973 :
5974 11 : if (rc == 0) {
5975 5 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
5976 5 : parent_io->internal.cb(parent_io, true, parent_io->internal.caller_ctx);
5977 5 : } else {
5978 6 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_MISCOMPARE;
5979 6 : parent_io->internal.cb(parent_io, false, parent_io->internal.caller_ctx);
5980 : }
5981 11 : }
5982 :
5983 : static void
5984 11 : bdev_compare_do_read(void *_bdev_io)
5985 : {
5986 11 : struct spdk_bdev_io *bdev_io = _bdev_io;
5987 : int rc;
5988 :
5989 22 : rc = spdk_bdev_read_blocks(bdev_io->internal.desc,
5990 11 : spdk_io_channel_from_ctx(bdev_io->internal.ch), NULL,
5991 11 : bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
5992 11 : bdev_compare_do_read_done, bdev_io);
5993 :
5994 11 : if (rc == -ENOMEM) {
5995 0 : bdev_queue_io_wait_with_cb(bdev_io, bdev_compare_do_read);
5996 11 : } else if (rc != 0) {
5997 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
5998 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
5999 0 : }
6000 11 : }
6001 :
6002 : static int
6003 16 : bdev_comparev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6004 : struct iovec *iov, int iovcnt, void *md_buf,
6005 : uint64_t offset_blocks, uint64_t num_blocks,
6006 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6007 : {
6008 16 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6009 : struct spdk_bdev_io *bdev_io;
6010 16 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6011 :
6012 16 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6013 0 : return -EINVAL;
6014 : }
6015 :
6016 16 : bdev_io = bdev_channel_get_io(channel);
6017 16 : if (!bdev_io) {
6018 0 : return -ENOMEM;
6019 : }
6020 :
6021 16 : bdev_io->internal.ch = channel;
6022 16 : bdev_io->internal.desc = desc;
6023 16 : bdev_io->type = SPDK_BDEV_IO_TYPE_COMPARE;
6024 16 : bdev_io->u.bdev.iovs = iov;
6025 16 : bdev_io->u.bdev.iovcnt = iovcnt;
6026 16 : bdev_io->u.bdev.md_buf = md_buf;
6027 16 : bdev_io->u.bdev.num_blocks = num_blocks;
6028 16 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6029 16 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6030 16 : bdev_io->u.bdev.memory_domain = NULL;
6031 16 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6032 16 : bdev_io->u.bdev.accel_sequence = NULL;
6033 :
6034 16 : if (bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COMPARE)) {
6035 7 : bdev_io_submit(bdev_io);
6036 7 : return 0;
6037 : }
6038 :
6039 9 : bdev_compare_do_read(bdev_io);
6040 :
6041 9 : return 0;
6042 16 : }
6043 :
6044 : int
6045 10 : spdk_bdev_comparev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6046 : struct iovec *iov, int iovcnt,
6047 : uint64_t offset_blocks, uint64_t num_blocks,
6048 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6049 : {
6050 20 : return bdev_comparev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
6051 10 : num_blocks, cb, cb_arg);
6052 : }
6053 :
6054 : int
6055 6 : spdk_bdev_comparev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6056 : struct iovec *iov, int iovcnt, void *md_buf,
6057 : uint64_t offset_blocks, uint64_t num_blocks,
6058 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6059 : {
6060 6 : if (md_buf && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
6061 0 : return -EINVAL;
6062 : }
6063 :
6064 6 : if (md_buf && !_is_buf_allocated(iov)) {
6065 0 : return -EINVAL;
6066 : }
6067 :
6068 12 : return bdev_comparev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
6069 6 : num_blocks, cb, cb_arg);
6070 6 : }
6071 :
6072 : static int
6073 4 : bdev_compare_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6074 : void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
6075 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6076 : {
6077 4 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6078 : struct spdk_bdev_io *bdev_io;
6079 4 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6080 :
6081 4 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6082 0 : return -EINVAL;
6083 : }
6084 :
6085 4 : bdev_io = bdev_channel_get_io(channel);
6086 4 : if (!bdev_io) {
6087 0 : return -ENOMEM;
6088 : }
6089 :
6090 4 : bdev_io->internal.ch = channel;
6091 4 : bdev_io->internal.desc = desc;
6092 4 : bdev_io->type = SPDK_BDEV_IO_TYPE_COMPARE;
6093 4 : bdev_io->u.bdev.iovs = &bdev_io->iov;
6094 4 : bdev_io->u.bdev.iovs[0].iov_base = buf;
6095 4 : bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen;
6096 4 : bdev_io->u.bdev.iovcnt = 1;
6097 4 : bdev_io->u.bdev.md_buf = md_buf;
6098 4 : bdev_io->u.bdev.num_blocks = num_blocks;
6099 4 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6100 4 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6101 4 : bdev_io->u.bdev.memory_domain = NULL;
6102 4 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6103 4 : bdev_io->u.bdev.accel_sequence = NULL;
6104 :
6105 4 : if (bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COMPARE)) {
6106 2 : bdev_io_submit(bdev_io);
6107 2 : return 0;
6108 : }
6109 :
6110 2 : bdev_compare_do_read(bdev_io);
6111 :
6112 2 : return 0;
6113 4 : }
6114 :
6115 : int
6116 4 : spdk_bdev_compare_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6117 : void *buf, uint64_t offset_blocks, uint64_t num_blocks,
6118 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6119 : {
6120 8 : return bdev_compare_blocks_with_md(desc, ch, buf, NULL, offset_blocks, num_blocks,
6121 4 : cb, cb_arg);
6122 : }
6123 :
6124 : int
6125 0 : spdk_bdev_compare_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6126 : void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
6127 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6128 : {
6129 0 : struct iovec iov = {
6130 0 : .iov_base = buf,
6131 : };
6132 :
6133 0 : if (md_buf && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
6134 0 : return -EINVAL;
6135 : }
6136 :
6137 0 : if (md_buf && !_is_buf_allocated(&iov)) {
6138 0 : return -EINVAL;
6139 : }
6140 :
6141 0 : return bdev_compare_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks,
6142 0 : cb, cb_arg);
6143 0 : }
6144 :
6145 : static void
6146 2 : bdev_comparev_and_writev_blocks_unlocked(struct lba_range *range, void *ctx, int unlock_status)
6147 : {
6148 2 : struct spdk_bdev_io *bdev_io = ctx;
6149 :
6150 2 : if (unlock_status) {
6151 0 : SPDK_ERRLOG("LBA range unlock failed\n");
6152 0 : }
6153 :
6154 4 : bdev_io->internal.cb(bdev_io, bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS ? true :
6155 2 : false, bdev_io->internal.caller_ctx);
6156 2 : }
6157 :
6158 : static void
6159 2 : bdev_comparev_and_writev_blocks_unlock(struct spdk_bdev_io *bdev_io, int status)
6160 : {
6161 2 : bdev_io->internal.status = status;
6162 :
6163 4 : bdev_unlock_lba_range(bdev_io->internal.desc, spdk_io_channel_from_ctx(bdev_io->internal.ch),
6164 2 : bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
6165 2 : bdev_comparev_and_writev_blocks_unlocked, bdev_io);
6166 2 : }
6167 :
6168 : static void
6169 1 : bdev_compare_and_write_do_write_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
6170 : {
6171 1 : struct spdk_bdev_io *parent_io = cb_arg;
6172 :
6173 1 : if (!success) {
6174 0 : SPDK_ERRLOG("Compare and write operation failed\n");
6175 0 : }
6176 :
6177 1 : spdk_bdev_free_io(bdev_io);
6178 :
6179 2 : bdev_comparev_and_writev_blocks_unlock(parent_io,
6180 1 : success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
6181 1 : }
6182 :
6183 : static void
6184 1 : bdev_compare_and_write_do_write(void *_bdev_io)
6185 : {
6186 1 : struct spdk_bdev_io *bdev_io = _bdev_io;
6187 : int rc;
6188 :
6189 2 : rc = spdk_bdev_writev_blocks(bdev_io->internal.desc,
6190 1 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
6191 1 : bdev_io->u.bdev.fused_iovs, bdev_io->u.bdev.fused_iovcnt,
6192 1 : bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
6193 1 : bdev_compare_and_write_do_write_done, bdev_io);
6194 :
6195 :
6196 1 : if (rc == -ENOMEM) {
6197 0 : bdev_queue_io_wait_with_cb(bdev_io, bdev_compare_and_write_do_write);
6198 1 : } else if (rc != 0) {
6199 0 : bdev_comparev_and_writev_blocks_unlock(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
6200 0 : }
6201 1 : }
6202 :
6203 : static void
6204 2 : bdev_compare_and_write_do_compare_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
6205 : {
6206 2 : struct spdk_bdev_io *parent_io = cb_arg;
6207 :
6208 2 : spdk_bdev_free_io(bdev_io);
6209 :
6210 2 : if (!success) {
6211 1 : bdev_comparev_and_writev_blocks_unlock(parent_io, SPDK_BDEV_IO_STATUS_MISCOMPARE);
6212 1 : return;
6213 : }
6214 :
6215 1 : bdev_compare_and_write_do_write(parent_io);
6216 2 : }
6217 :
6218 : static void
6219 2 : bdev_compare_and_write_do_compare(void *_bdev_io)
6220 : {
6221 2 : struct spdk_bdev_io *bdev_io = _bdev_io;
6222 : int rc;
6223 :
6224 4 : rc = spdk_bdev_comparev_blocks(bdev_io->internal.desc,
6225 2 : spdk_io_channel_from_ctx(bdev_io->internal.ch), bdev_io->u.bdev.iovs,
6226 2 : bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
6227 2 : bdev_compare_and_write_do_compare_done, bdev_io);
6228 :
6229 2 : if (rc == -ENOMEM) {
6230 0 : bdev_queue_io_wait_with_cb(bdev_io, bdev_compare_and_write_do_compare);
6231 2 : } else if (rc != 0) {
6232 0 : bdev_comparev_and_writev_blocks_unlock(bdev_io, SPDK_BDEV_IO_STATUS_FIRST_FUSED_FAILED);
6233 0 : }
6234 2 : }
6235 :
6236 : static void
6237 2 : bdev_comparev_and_writev_blocks_locked(struct lba_range *range, void *ctx, int status)
6238 : {
6239 2 : struct spdk_bdev_io *bdev_io = ctx;
6240 :
6241 2 : if (status) {
6242 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FIRST_FUSED_FAILED;
6243 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
6244 0 : return;
6245 : }
6246 :
6247 2 : bdev_compare_and_write_do_compare(bdev_io);
6248 2 : }
6249 :
6250 : int
6251 2 : spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6252 : struct iovec *compare_iov, int compare_iovcnt,
6253 : struct iovec *write_iov, int write_iovcnt,
6254 : uint64_t offset_blocks, uint64_t num_blocks,
6255 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6256 : {
6257 2 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6258 : struct spdk_bdev_io *bdev_io;
6259 2 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6260 :
6261 2 : if (!desc->write) {
6262 0 : return -EBADF;
6263 : }
6264 :
6265 2 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6266 0 : return -EINVAL;
6267 : }
6268 :
6269 2 : if (num_blocks > bdev->acwu) {
6270 0 : return -EINVAL;
6271 : }
6272 :
6273 2 : bdev_io = bdev_channel_get_io(channel);
6274 2 : if (!bdev_io) {
6275 0 : return -ENOMEM;
6276 : }
6277 :
6278 2 : bdev_io->internal.ch = channel;
6279 2 : bdev_io->internal.desc = desc;
6280 2 : bdev_io->type = SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE;
6281 2 : bdev_io->u.bdev.iovs = compare_iov;
6282 2 : bdev_io->u.bdev.iovcnt = compare_iovcnt;
6283 2 : bdev_io->u.bdev.fused_iovs = write_iov;
6284 2 : bdev_io->u.bdev.fused_iovcnt = write_iovcnt;
6285 2 : bdev_io->u.bdev.md_buf = NULL;
6286 2 : bdev_io->u.bdev.num_blocks = num_blocks;
6287 2 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6288 2 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6289 2 : bdev_io->u.bdev.memory_domain = NULL;
6290 2 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6291 2 : bdev_io->u.bdev.accel_sequence = NULL;
6292 :
6293 2 : if (bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE)) {
6294 0 : bdev_io_submit(bdev_io);
6295 0 : return 0;
6296 : }
6297 :
6298 4 : return bdev_lock_lba_range(desc, ch, offset_blocks, num_blocks,
6299 2 : bdev_comparev_and_writev_blocks_locked, bdev_io);
6300 2 : }
6301 :
6302 : int
6303 2 : spdk_bdev_zcopy_start(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6304 : struct iovec *iov, int iovcnt,
6305 : uint64_t offset_blocks, uint64_t num_blocks,
6306 : bool populate,
6307 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6308 : {
6309 2 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6310 : struct spdk_bdev_io *bdev_io;
6311 2 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6312 :
6313 2 : if (!desc->write) {
6314 0 : return -EBADF;
6315 : }
6316 :
6317 2 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6318 0 : return -EINVAL;
6319 : }
6320 :
6321 2 : if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY)) {
6322 0 : return -ENOTSUP;
6323 : }
6324 :
6325 2 : bdev_io = bdev_channel_get_io(channel);
6326 2 : if (!bdev_io) {
6327 0 : return -ENOMEM;
6328 : }
6329 :
6330 2 : bdev_io->internal.ch = channel;
6331 2 : bdev_io->internal.desc = desc;
6332 2 : bdev_io->type = SPDK_BDEV_IO_TYPE_ZCOPY;
6333 2 : bdev_io->u.bdev.num_blocks = num_blocks;
6334 2 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6335 2 : bdev_io->u.bdev.iovs = iov;
6336 2 : bdev_io->u.bdev.iovcnt = iovcnt;
6337 2 : bdev_io->u.bdev.md_buf = NULL;
6338 2 : bdev_io->u.bdev.zcopy.populate = populate ? 1 : 0;
6339 2 : bdev_io->u.bdev.zcopy.commit = 0;
6340 2 : bdev_io->u.bdev.zcopy.start = 1;
6341 2 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6342 2 : bdev_io->u.bdev.memory_domain = NULL;
6343 2 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6344 2 : bdev_io->u.bdev.accel_sequence = NULL;
6345 :
6346 2 : bdev_io_submit(bdev_io);
6347 :
6348 2 : return 0;
6349 2 : }
6350 :
6351 : int
6352 2 : spdk_bdev_zcopy_end(struct spdk_bdev_io *bdev_io, bool commit,
6353 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6354 : {
6355 2 : if (bdev_io->type != SPDK_BDEV_IO_TYPE_ZCOPY) {
6356 0 : return -EINVAL;
6357 : }
6358 :
6359 2 : bdev_io->u.bdev.zcopy.commit = commit ? 1 : 0;
6360 2 : bdev_io->u.bdev.zcopy.start = 0;
6361 2 : bdev_io->internal.caller_ctx = cb_arg;
6362 2 : bdev_io->internal.cb = cb;
6363 2 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
6364 :
6365 2 : bdev_io_submit(bdev_io);
6366 :
6367 2 : return 0;
6368 2 : }
6369 :
6370 : int
6371 0 : spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6372 : uint64_t offset, uint64_t len,
6373 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6374 : {
6375 : uint64_t offset_blocks, num_blocks;
6376 :
6377 0 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, len, &num_blocks) != 0) {
6378 0 : return -EINVAL;
6379 : }
6380 :
6381 0 : return spdk_bdev_write_zeroes_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
6382 0 : }
6383 :
6384 : int
6385 33 : spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6386 : uint64_t offset_blocks, uint64_t num_blocks,
6387 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6388 : {
6389 33 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6390 : struct spdk_bdev_io *bdev_io;
6391 33 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6392 :
6393 33 : if (!desc->write) {
6394 0 : return -EBADF;
6395 : }
6396 :
6397 33 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6398 0 : return -EINVAL;
6399 : }
6400 :
6401 33 : if (!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES) &&
6402 10 : !bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE)) {
6403 1 : return -ENOTSUP;
6404 : }
6405 :
6406 32 : bdev_io = bdev_channel_get_io(channel);
6407 :
6408 32 : if (!bdev_io) {
6409 0 : return -ENOMEM;
6410 : }
6411 :
6412 32 : bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES;
6413 32 : bdev_io->internal.ch = channel;
6414 32 : bdev_io->internal.desc = desc;
6415 32 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6416 32 : bdev_io->u.bdev.num_blocks = num_blocks;
6417 32 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6418 32 : bdev_io->u.bdev.memory_domain = NULL;
6419 32 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6420 32 : bdev_io->u.bdev.accel_sequence = NULL;
6421 :
6422 : /* If the write_zeroes size is large and should be split, use the generic split
6423 : * logic regardless of whether SPDK_BDEV_IO_TYPE_WRITE_ZEREOS is supported or not.
6424 : *
6425 : * Then, send the write_zeroes request if SPDK_BDEV_IO_TYPE_WRITE_ZEROES is supported
6426 : * or emulate it using regular write request otherwise.
6427 : */
6428 32 : if (bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES) ||
6429 9 : bdev_io->internal.f.split) {
6430 26 : bdev_io_submit(bdev_io);
6431 26 : return 0;
6432 : }
6433 :
6434 6 : assert(_bdev_get_block_size_with_md(bdev) <= ZERO_BUFFER_SIZE);
6435 :
6436 6 : return bdev_write_zero_buffer(bdev_io);
6437 33 : }
6438 :
6439 : int
6440 0 : spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6441 : uint64_t offset, uint64_t nbytes,
6442 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6443 : {
6444 : uint64_t offset_blocks, num_blocks;
6445 :
6446 0 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
6447 0 : return -EINVAL;
6448 : }
6449 :
6450 0 : return spdk_bdev_unmap_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
6451 0 : }
6452 :
6453 : static void
6454 0 : bdev_io_complete_cb(void *ctx)
6455 : {
6456 0 : struct spdk_bdev_io *bdev_io = ctx;
6457 :
6458 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
6459 0 : bdev_io->internal.cb(bdev_io, true, bdev_io->internal.caller_ctx);
6460 0 : }
6461 :
6462 : int
6463 22 : spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6464 : uint64_t offset_blocks, uint64_t num_blocks,
6465 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6466 : {
6467 22 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6468 : struct spdk_bdev_io *bdev_io;
6469 22 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6470 :
6471 22 : if (!desc->write) {
6472 0 : return -EBADF;
6473 : }
6474 :
6475 22 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6476 0 : return -EINVAL;
6477 : }
6478 :
6479 22 : bdev_io = bdev_channel_get_io(channel);
6480 22 : if (!bdev_io) {
6481 0 : return -ENOMEM;
6482 : }
6483 :
6484 22 : bdev_io->internal.ch = channel;
6485 22 : bdev_io->internal.desc = desc;
6486 22 : bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP;
6487 :
6488 22 : bdev_io->u.bdev.iovs = &bdev_io->iov;
6489 22 : bdev_io->u.bdev.iovs[0].iov_base = NULL;
6490 22 : bdev_io->u.bdev.iovs[0].iov_len = 0;
6491 22 : bdev_io->u.bdev.iovcnt = 1;
6492 :
6493 22 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6494 22 : bdev_io->u.bdev.num_blocks = num_blocks;
6495 22 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6496 22 : bdev_io->u.bdev.memory_domain = NULL;
6497 22 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6498 22 : bdev_io->u.bdev.accel_sequence = NULL;
6499 :
6500 22 : if (num_blocks == 0) {
6501 0 : spdk_thread_send_msg(spdk_get_thread(), bdev_io_complete_cb, bdev_io);
6502 0 : return 0;
6503 : }
6504 :
6505 22 : bdev_io_submit(bdev_io);
6506 22 : return 0;
6507 22 : }
6508 :
6509 : int
6510 0 : spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6511 : uint64_t offset, uint64_t length,
6512 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6513 : {
6514 : uint64_t offset_blocks, num_blocks;
6515 :
6516 0 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, length, &num_blocks) != 0) {
6517 0 : return -EINVAL;
6518 : }
6519 :
6520 0 : return spdk_bdev_flush_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
6521 0 : }
6522 :
6523 : int
6524 2 : spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6525 : uint64_t offset_blocks, uint64_t num_blocks,
6526 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6527 : {
6528 2 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6529 : struct spdk_bdev_io *bdev_io;
6530 2 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6531 :
6532 2 : if (!desc->write) {
6533 0 : return -EBADF;
6534 : }
6535 :
6536 2 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6537 0 : return -EINVAL;
6538 : }
6539 :
6540 2 : bdev_io = bdev_channel_get_io(channel);
6541 2 : if (!bdev_io) {
6542 0 : return -ENOMEM;
6543 : }
6544 :
6545 2 : bdev_io->internal.ch = channel;
6546 2 : bdev_io->internal.desc = desc;
6547 2 : bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH;
6548 2 : bdev_io->u.bdev.iovs = NULL;
6549 2 : bdev_io->u.bdev.iovcnt = 0;
6550 2 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6551 2 : bdev_io->u.bdev.num_blocks = num_blocks;
6552 2 : bdev_io->u.bdev.memory_domain = NULL;
6553 2 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6554 2 : bdev_io->u.bdev.accel_sequence = NULL;
6555 2 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6556 :
6557 2 : bdev_io_submit(bdev_io);
6558 2 : return 0;
6559 2 : }
6560 :
6561 : static int bdev_reset_poll_for_outstanding_io(void *ctx);
6562 :
6563 : static void
6564 13 : bdev_reset_check_outstanding_io_done(struct spdk_bdev *bdev, void *_ctx, int status)
6565 : {
6566 13 : struct spdk_bdev_channel *ch = _ctx;
6567 : struct spdk_bdev_io *bdev_io;
6568 :
6569 13 : bdev_io = TAILQ_FIRST(&ch->queued_resets);
6570 :
6571 13 : if (status == -EBUSY) {
6572 9 : if (spdk_get_ticks() < bdev_io->u.reset.wait_poller.stop_time_tsc) {
6573 8 : bdev_io->u.reset.wait_poller.poller = SPDK_POLLER_REGISTER(bdev_reset_poll_for_outstanding_io,
6574 : ch, BDEV_RESET_CHECK_OUTSTANDING_IO_PERIOD);
6575 8 : } else {
6576 1 : TAILQ_REMOVE(&ch->queued_resets, bdev_io, internal.link);
6577 :
6578 1 : if (TAILQ_EMPTY(&ch->io_memory_domain) && TAILQ_EMPTY(&ch->io_accel_exec)) {
6579 : /* If outstanding IOs are still present and reset_io_drain_timeout
6580 : * seconds passed, start the reset. */
6581 1 : bdev_io_submit_reset(bdev_io);
6582 1 : } else {
6583 : /* We still have in progress memory domain pull/push or we're
6584 : * executing accel sequence. Since we cannot abort either of those
6585 : * operations, fail the reset request. */
6586 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
6587 : }
6588 : }
6589 9 : } else {
6590 4 : TAILQ_REMOVE(&ch->queued_resets, bdev_io, internal.link);
6591 4 : SPDK_DEBUGLOG(bdev,
6592 : "Skipping reset for underlying device of bdev: %s - no outstanding I/O.\n",
6593 : ch->bdev->name);
6594 : /* Mark the completion status as a SUCCESS and complete the reset. */
6595 4 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
6596 : }
6597 13 : }
6598 :
6599 : static void
6600 13 : bdev_reset_check_outstanding_io(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
6601 : struct spdk_io_channel *io_ch, void *_ctx)
6602 : {
6603 13 : struct spdk_bdev_channel *cur_ch = __io_ch_to_bdev_ch(io_ch);
6604 13 : int status = 0;
6605 :
6606 17 : if (cur_ch->io_outstanding > 0 ||
6607 4 : !TAILQ_EMPTY(&cur_ch->io_memory_domain) ||
6608 4 : !TAILQ_EMPTY(&cur_ch->io_accel_exec)) {
6609 : /* If a channel has outstanding IO, set status to -EBUSY code. This will stop
6610 : * further iteration over the rest of the channels and pass non-zero status
6611 : * to the callback function. */
6612 9 : status = -EBUSY;
6613 9 : }
6614 13 : spdk_bdev_for_each_channel_continue(i, status);
6615 13 : }
6616 :
6617 : static int
6618 8 : bdev_reset_poll_for_outstanding_io(void *ctx)
6619 : {
6620 8 : struct spdk_bdev_channel *ch = ctx;
6621 : struct spdk_bdev_io *bdev_io;
6622 :
6623 8 : bdev_io = TAILQ_FIRST(&ch->queued_resets);
6624 :
6625 8 : spdk_poller_unregister(&bdev_io->u.reset.wait_poller.poller);
6626 8 : spdk_bdev_for_each_channel(ch->bdev, bdev_reset_check_outstanding_io, ch,
6627 : bdev_reset_check_outstanding_io_done);
6628 :
6629 8 : return SPDK_POLLER_BUSY;
6630 : }
6631 :
6632 : static void
6633 15 : bdev_reset_freeze_channel_done(struct spdk_bdev *bdev, void *_ctx, int status)
6634 : {
6635 15 : struct spdk_bdev_channel *ch = _ctx;
6636 : struct spdk_bdev_io *bdev_io;
6637 :
6638 15 : bdev_io = TAILQ_FIRST(&ch->queued_resets);
6639 :
6640 15 : if (bdev->reset_io_drain_timeout == 0) {
6641 10 : TAILQ_REMOVE(&ch->queued_resets, bdev_io, internal.link);
6642 :
6643 10 : bdev_io_submit_reset(bdev_io);
6644 10 : return;
6645 : }
6646 :
6647 10 : bdev_io->u.reset.wait_poller.stop_time_tsc = spdk_get_ticks() +
6648 5 : (ch->bdev->reset_io_drain_timeout * spdk_get_ticks_hz());
6649 :
6650 : /* In case bdev->reset_io_drain_timeout is not equal to zero,
6651 : * submit the reset to the underlying module only if outstanding I/O
6652 : * remain after reset_io_drain_timeout seconds have passed. */
6653 5 : spdk_bdev_for_each_channel(ch->bdev, bdev_reset_check_outstanding_io, ch,
6654 : bdev_reset_check_outstanding_io_done);
6655 15 : }
6656 :
6657 : static void
6658 18 : bdev_reset_freeze_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
6659 : struct spdk_io_channel *ch, void *_ctx)
6660 : {
6661 : struct spdk_bdev_channel *channel;
6662 : struct spdk_bdev_mgmt_channel *mgmt_channel;
6663 : struct spdk_bdev_shared_resource *shared_resource;
6664 : bdev_io_tailq_t tmp_queued;
6665 :
6666 18 : TAILQ_INIT(&tmp_queued);
6667 :
6668 18 : channel = __io_ch_to_bdev_ch(ch);
6669 18 : shared_resource = channel->shared_resource;
6670 18 : mgmt_channel = shared_resource->mgmt_ch;
6671 :
6672 18 : channel->flags |= BDEV_CH_RESET_IN_PROGRESS;
6673 :
6674 18 : if ((channel->flags & BDEV_CH_QOS_ENABLED) != 0) {
6675 2 : TAILQ_SWAP(&channel->qos_queued_io, &tmp_queued, spdk_bdev_io, internal.link);
6676 2 : }
6677 :
6678 18 : bdev_abort_all_queued_io(&shared_resource->nomem_io, channel);
6679 18 : bdev_abort_all_buf_io(mgmt_channel, channel);
6680 18 : bdev_abort_all_queued_io(&tmp_queued, channel);
6681 :
6682 18 : spdk_bdev_for_each_channel_continue(i, 0);
6683 18 : }
6684 :
6685 : static void
6686 15 : bdev_start_reset(void *ctx)
6687 : {
6688 15 : struct spdk_bdev_channel *ch = ctx;
6689 :
6690 15 : spdk_bdev_for_each_channel(ch->bdev, bdev_reset_freeze_channel, ch,
6691 : bdev_reset_freeze_channel_done);
6692 15 : }
6693 :
6694 : static void
6695 16 : bdev_channel_start_reset(struct spdk_bdev_channel *ch)
6696 : {
6697 16 : struct spdk_bdev *bdev = ch->bdev;
6698 :
6699 16 : assert(!TAILQ_EMPTY(&ch->queued_resets));
6700 :
6701 16 : spdk_spin_lock(&bdev->internal.spinlock);
6702 16 : if (bdev->internal.reset_in_progress == NULL) {
6703 15 : bdev->internal.reset_in_progress = TAILQ_FIRST(&ch->queued_resets);
6704 : /*
6705 : * Take a channel reference for the target bdev for the life of this
6706 : * reset. This guards against the channel getting destroyed while
6707 : * spdk_bdev_for_each_channel() calls related to this reset IO are in
6708 : * progress. We will release the reference when this reset is
6709 : * completed.
6710 : */
6711 15 : bdev->internal.reset_in_progress->u.reset.ch_ref = spdk_get_io_channel(__bdev_to_io_dev(bdev));
6712 15 : bdev_start_reset(ch);
6713 15 : }
6714 16 : spdk_spin_unlock(&bdev->internal.spinlock);
6715 16 : }
6716 :
6717 : int
6718 16 : spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6719 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6720 : {
6721 16 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6722 : struct spdk_bdev_io *bdev_io;
6723 16 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6724 :
6725 16 : bdev_io = bdev_channel_get_io(channel);
6726 16 : if (!bdev_io) {
6727 0 : return -ENOMEM;
6728 : }
6729 :
6730 16 : bdev_io->internal.ch = channel;
6731 16 : bdev_io->internal.desc = desc;
6732 16 : bdev_io->internal.submit_tsc = spdk_get_ticks();
6733 16 : bdev_io->type = SPDK_BDEV_IO_TYPE_RESET;
6734 16 : bdev_io->u.reset.ch_ref = NULL;
6735 16 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6736 :
6737 16 : spdk_spin_lock(&bdev->internal.spinlock);
6738 16 : TAILQ_INSERT_TAIL(&channel->queued_resets, bdev_io, internal.link);
6739 16 : spdk_spin_unlock(&bdev->internal.spinlock);
6740 :
6741 16 : bdev_ch_add_to_io_submitted(bdev_io);
6742 :
6743 16 : bdev_channel_start_reset(channel);
6744 :
6745 16 : return 0;
6746 16 : }
6747 :
6748 : void
6749 0 : spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
6750 : struct spdk_bdev_io_stat *stat, enum spdk_bdev_reset_stat_mode reset_mode)
6751 : {
6752 0 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6753 :
6754 0 : bdev_get_io_stat(stat, channel->stat);
6755 0 : spdk_bdev_reset_io_stat(stat, reset_mode);
6756 0 : }
6757 :
6758 : static void
6759 5 : bdev_get_device_stat_done(struct spdk_bdev *bdev, void *_ctx, int status)
6760 : {
6761 5 : struct spdk_bdev_iostat_ctx *bdev_iostat_ctx = _ctx;
6762 :
6763 10 : bdev_iostat_ctx->cb(bdev, bdev_iostat_ctx->stat,
6764 5 : bdev_iostat_ctx->cb_arg, 0);
6765 5 : free(bdev_iostat_ctx);
6766 5 : }
6767 :
6768 : static void
6769 4 : bdev_get_each_channel_stat(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
6770 : struct spdk_io_channel *ch, void *_ctx)
6771 : {
6772 4 : struct spdk_bdev_iostat_ctx *bdev_iostat_ctx = _ctx;
6773 4 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6774 :
6775 4 : spdk_bdev_add_io_stat(bdev_iostat_ctx->stat, channel->stat);
6776 4 : spdk_bdev_reset_io_stat(channel->stat, bdev_iostat_ctx->reset_mode);
6777 4 : spdk_bdev_for_each_channel_continue(i, 0);
6778 4 : }
6779 :
6780 : void
6781 5 : spdk_bdev_get_device_stat(struct spdk_bdev *bdev, struct spdk_bdev_io_stat *stat,
6782 : enum spdk_bdev_reset_stat_mode reset_mode, spdk_bdev_get_device_stat_cb cb, void *cb_arg)
6783 : {
6784 : struct spdk_bdev_iostat_ctx *bdev_iostat_ctx;
6785 :
6786 5 : assert(bdev != NULL);
6787 5 : assert(stat != NULL);
6788 5 : assert(cb != NULL);
6789 :
6790 5 : bdev_iostat_ctx = calloc(1, sizeof(struct spdk_bdev_iostat_ctx));
6791 5 : if (bdev_iostat_ctx == NULL) {
6792 0 : SPDK_ERRLOG("Unable to allocate memory for spdk_bdev_iostat_ctx\n");
6793 0 : cb(bdev, stat, cb_arg, -ENOMEM);
6794 0 : return;
6795 : }
6796 :
6797 5 : bdev_iostat_ctx->stat = stat;
6798 5 : bdev_iostat_ctx->cb = cb;
6799 5 : bdev_iostat_ctx->cb_arg = cb_arg;
6800 5 : bdev_iostat_ctx->reset_mode = reset_mode;
6801 :
6802 : /* Start with the statistics from previously deleted channels. */
6803 5 : spdk_spin_lock(&bdev->internal.spinlock);
6804 5 : bdev_get_io_stat(bdev_iostat_ctx->stat, bdev->internal.stat);
6805 5 : spdk_bdev_reset_io_stat(bdev->internal.stat, reset_mode);
6806 5 : spdk_spin_unlock(&bdev->internal.spinlock);
6807 :
6808 : /* Then iterate and add the statistics from each existing channel. */
6809 5 : spdk_bdev_for_each_channel(bdev, bdev_get_each_channel_stat, bdev_iostat_ctx,
6810 : bdev_get_device_stat_done);
6811 5 : }
6812 :
6813 : struct bdev_iostat_reset_ctx {
6814 : enum spdk_bdev_reset_stat_mode mode;
6815 : bdev_reset_device_stat_cb cb;
6816 : void *cb_arg;
6817 : };
6818 :
6819 : static void
6820 0 : bdev_reset_device_stat_done(struct spdk_bdev *bdev, void *_ctx, int status)
6821 : {
6822 0 : struct bdev_iostat_reset_ctx *ctx = _ctx;
6823 :
6824 0 : ctx->cb(bdev, ctx->cb_arg, 0);
6825 :
6826 0 : free(ctx);
6827 0 : }
6828 :
6829 : static void
6830 0 : bdev_reset_each_channel_stat(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
6831 : struct spdk_io_channel *ch, void *_ctx)
6832 : {
6833 0 : struct bdev_iostat_reset_ctx *ctx = _ctx;
6834 0 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6835 :
6836 0 : spdk_bdev_reset_io_stat(channel->stat, ctx->mode);
6837 :
6838 0 : spdk_bdev_for_each_channel_continue(i, 0);
6839 0 : }
6840 :
6841 : void
6842 0 : bdev_reset_device_stat(struct spdk_bdev *bdev, enum spdk_bdev_reset_stat_mode mode,
6843 : bdev_reset_device_stat_cb cb, void *cb_arg)
6844 : {
6845 : struct bdev_iostat_reset_ctx *ctx;
6846 :
6847 0 : assert(bdev != NULL);
6848 0 : assert(cb != NULL);
6849 :
6850 0 : ctx = calloc(1, sizeof(*ctx));
6851 0 : if (ctx == NULL) {
6852 0 : SPDK_ERRLOG("Unable to allocate bdev_iostat_reset_ctx.\n");
6853 0 : cb(bdev, cb_arg, -ENOMEM);
6854 0 : return;
6855 : }
6856 :
6857 0 : ctx->mode = mode;
6858 0 : ctx->cb = cb;
6859 0 : ctx->cb_arg = cb_arg;
6860 :
6861 0 : spdk_spin_lock(&bdev->internal.spinlock);
6862 0 : spdk_bdev_reset_io_stat(bdev->internal.stat, mode);
6863 0 : spdk_spin_unlock(&bdev->internal.spinlock);
6864 :
6865 0 : spdk_bdev_for_each_channel(bdev,
6866 : bdev_reset_each_channel_stat,
6867 0 : ctx,
6868 : bdev_reset_device_stat_done);
6869 0 : }
6870 :
6871 : int
6872 1 : spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6873 : const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes,
6874 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6875 : {
6876 1 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6877 : struct spdk_bdev_io *bdev_io;
6878 1 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6879 :
6880 1 : if (!desc->write) {
6881 0 : return -EBADF;
6882 : }
6883 :
6884 1 : if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN))) {
6885 1 : return -ENOTSUP;
6886 : }
6887 :
6888 0 : bdev_io = bdev_channel_get_io(channel);
6889 0 : if (!bdev_io) {
6890 0 : return -ENOMEM;
6891 : }
6892 :
6893 0 : bdev_io->internal.ch = channel;
6894 0 : bdev_io->internal.desc = desc;
6895 0 : bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_ADMIN;
6896 0 : bdev_io->u.nvme_passthru.cmd = *cmd;
6897 0 : bdev_io->u.nvme_passthru.buf = buf;
6898 0 : bdev_io->u.nvme_passthru.nbytes = nbytes;
6899 0 : bdev_io->u.nvme_passthru.md_buf = NULL;
6900 0 : bdev_io->u.nvme_passthru.md_len = 0;
6901 :
6902 0 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6903 :
6904 0 : bdev_io_submit(bdev_io);
6905 0 : return 0;
6906 1 : }
6907 :
6908 : int
6909 1 : spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6910 : const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes,
6911 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6912 : {
6913 1 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6914 : struct spdk_bdev_io *bdev_io;
6915 1 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6916 :
6917 1 : if (!desc->write) {
6918 : /*
6919 : * Do not try to parse the NVMe command - we could maybe use bits in the opcode
6920 : * to easily determine if the command is a read or write, but for now just
6921 : * do not allow io_passthru with a read-only descriptor.
6922 : */
6923 0 : return -EBADF;
6924 : }
6925 :
6926 1 : if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO))) {
6927 1 : return -ENOTSUP;
6928 : }
6929 :
6930 0 : bdev_io = bdev_channel_get_io(channel);
6931 0 : if (!bdev_io) {
6932 0 : return -ENOMEM;
6933 : }
6934 :
6935 0 : bdev_io->internal.ch = channel;
6936 0 : bdev_io->internal.desc = desc;
6937 0 : bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO;
6938 0 : bdev_io->u.nvme_passthru.cmd = *cmd;
6939 0 : bdev_io->u.nvme_passthru.buf = buf;
6940 0 : bdev_io->u.nvme_passthru.nbytes = nbytes;
6941 0 : bdev_io->u.nvme_passthru.md_buf = NULL;
6942 0 : bdev_io->u.nvme_passthru.md_len = 0;
6943 :
6944 0 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6945 :
6946 0 : bdev_io_submit(bdev_io);
6947 0 : return 0;
6948 1 : }
6949 :
6950 : int
6951 1 : spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6952 : const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len,
6953 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6954 : {
6955 1 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6956 : struct spdk_bdev_io *bdev_io;
6957 1 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6958 :
6959 1 : if (!desc->write) {
6960 : /*
6961 : * Do not try to parse the NVMe command - we could maybe use bits in the opcode
6962 : * to easily determine if the command is a read or write, but for now just
6963 : * do not allow io_passthru with a read-only descriptor.
6964 : */
6965 0 : return -EBADF;
6966 : }
6967 :
6968 1 : if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO_MD))) {
6969 1 : return -ENOTSUP;
6970 : }
6971 :
6972 0 : bdev_io = bdev_channel_get_io(channel);
6973 0 : if (!bdev_io) {
6974 0 : return -ENOMEM;
6975 : }
6976 :
6977 0 : bdev_io->internal.ch = channel;
6978 0 : bdev_io->internal.desc = desc;
6979 0 : bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO_MD;
6980 0 : bdev_io->u.nvme_passthru.cmd = *cmd;
6981 0 : bdev_io->u.nvme_passthru.buf = buf;
6982 0 : bdev_io->u.nvme_passthru.nbytes = nbytes;
6983 0 : bdev_io->u.nvme_passthru.md_buf = md_buf;
6984 0 : bdev_io->u.nvme_passthru.md_len = md_len;
6985 :
6986 0 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6987 :
6988 0 : bdev_io_submit(bdev_io);
6989 0 : return 0;
6990 1 : }
6991 :
6992 : int
6993 0 : spdk_bdev_nvme_iov_passthru_md(struct spdk_bdev_desc *desc,
6994 : struct spdk_io_channel *ch,
6995 : const struct spdk_nvme_cmd *cmd,
6996 : struct iovec *iov, int iovcnt, size_t nbytes,
6997 : void *md_buf, size_t md_len,
6998 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6999 : {
7000 0 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
7001 : struct spdk_bdev_io *bdev_io;
7002 0 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
7003 :
7004 0 : if (!desc->write) {
7005 : /*
7006 : * Do not try to parse the NVMe command - we could maybe use bits in the opcode
7007 : * to easily determine if the command is a read or write, but for now just
7008 : * do not allow io_passthru with a read-only descriptor.
7009 : */
7010 0 : return -EBADF;
7011 : }
7012 :
7013 0 : if (md_buf && spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO_MD))) {
7014 0 : return -ENOTSUP;
7015 0 : } else if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO))) {
7016 0 : return -ENOTSUP;
7017 : }
7018 :
7019 0 : bdev_io = bdev_channel_get_io(channel);
7020 0 : if (!bdev_io) {
7021 0 : return -ENOMEM;
7022 : }
7023 :
7024 0 : bdev_io->internal.ch = channel;
7025 0 : bdev_io->internal.desc = desc;
7026 0 : bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IOV_MD;
7027 0 : bdev_io->u.nvme_passthru.cmd = *cmd;
7028 0 : bdev_io->u.nvme_passthru.iovs = iov;
7029 0 : bdev_io->u.nvme_passthru.iovcnt = iovcnt;
7030 0 : bdev_io->u.nvme_passthru.nbytes = nbytes;
7031 0 : bdev_io->u.nvme_passthru.md_buf = md_buf;
7032 0 : bdev_io->u.nvme_passthru.md_len = md_len;
7033 :
7034 0 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
7035 :
7036 0 : bdev_io_submit(bdev_io);
7037 0 : return 0;
7038 0 : }
7039 :
7040 : static void bdev_abort_retry(void *ctx);
7041 : static void bdev_abort(struct spdk_bdev_io *parent_io);
7042 :
7043 : static void
7044 22 : bdev_abort_io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
7045 : {
7046 22 : struct spdk_bdev_channel *channel = bdev_io->internal.ch;
7047 22 : struct spdk_bdev_io *parent_io = cb_arg;
7048 : struct spdk_bdev_io *bio_to_abort, *tmp_io;
7049 :
7050 22 : bio_to_abort = bdev_io->u.abort.bio_to_abort;
7051 :
7052 22 : spdk_bdev_free_io(bdev_io);
7053 :
7054 22 : if (!success) {
7055 : /* Check if the target I/O completed in the meantime. */
7056 2 : TAILQ_FOREACH(tmp_io, &channel->io_submitted, internal.ch_link) {
7057 1 : if (tmp_io == bio_to_abort) {
7058 0 : break;
7059 : }
7060 1 : }
7061 :
7062 : /* If the target I/O still exists, set the parent to failed. */
7063 1 : if (tmp_io != NULL) {
7064 0 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
7065 0 : }
7066 1 : }
7067 :
7068 22 : assert(parent_io->internal.f.split);
7069 :
7070 22 : parent_io->internal.split.outstanding--;
7071 22 : if (parent_io->internal.split.outstanding == 0) {
7072 16 : if (parent_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) {
7073 0 : bdev_abort_retry(parent_io);
7074 0 : } else {
7075 16 : bdev_io_complete(parent_io);
7076 : }
7077 16 : }
7078 22 : }
7079 :
7080 : static int
7081 23 : bdev_abort_io(struct spdk_bdev_desc *desc, struct spdk_bdev_channel *channel,
7082 : struct spdk_bdev_io *bio_to_abort,
7083 : spdk_bdev_io_completion_cb cb, void *cb_arg)
7084 : {
7085 23 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
7086 : struct spdk_bdev_io *bdev_io;
7087 :
7088 23 : if (bio_to_abort->type == SPDK_BDEV_IO_TYPE_ABORT ||
7089 23 : bio_to_abort->type == SPDK_BDEV_IO_TYPE_RESET) {
7090 : /* TODO: Abort reset or abort request. */
7091 0 : return -ENOTSUP;
7092 : }
7093 :
7094 23 : bdev_io = bdev_channel_get_io(channel);
7095 23 : if (bdev_io == NULL) {
7096 1 : return -ENOMEM;
7097 : }
7098 :
7099 22 : bdev_io->internal.ch = channel;
7100 22 : bdev_io->internal.desc = desc;
7101 22 : bdev_io->type = SPDK_BDEV_IO_TYPE_ABORT;
7102 22 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
7103 :
7104 22 : if (bio_to_abort->internal.f.split) {
7105 6 : assert(bdev_io_should_split(bio_to_abort));
7106 6 : bdev_io->u.bdev.abort.bio_cb_arg = bio_to_abort;
7107 :
7108 : /* Parent abort request is not submitted directly, but to manage its
7109 : * execution add it to the submitted list here.
7110 : */
7111 6 : bdev_io->internal.submit_tsc = spdk_get_ticks();
7112 6 : bdev_ch_add_to_io_submitted(bdev_io);
7113 :
7114 6 : bdev_abort(bdev_io);
7115 :
7116 6 : return 0;
7117 : }
7118 :
7119 16 : bdev_io->u.abort.bio_to_abort = bio_to_abort;
7120 :
7121 : /* Submit the abort request to the underlying bdev module. */
7122 16 : bdev_io_submit(bdev_io);
7123 :
7124 16 : return 0;
7125 23 : }
7126 :
7127 : static bool
7128 46 : bdev_io_on_tailq(struct spdk_bdev_io *bdev_io, bdev_io_tailq_t *tailq)
7129 : {
7130 : struct spdk_bdev_io *iter;
7131 :
7132 46 : TAILQ_FOREACH(iter, tailq, internal.link) {
7133 0 : if (iter == bdev_io) {
7134 0 : return true;
7135 : }
7136 0 : }
7137 :
7138 46 : return false;
7139 46 : }
7140 :
7141 : static uint32_t
7142 18 : _bdev_abort(struct spdk_bdev_io *parent_io)
7143 : {
7144 18 : struct spdk_bdev_desc *desc = parent_io->internal.desc;
7145 18 : struct spdk_bdev_channel *channel = parent_io->internal.ch;
7146 : void *bio_cb_arg;
7147 : struct spdk_bdev_io *bio_to_abort;
7148 : uint32_t matched_ios;
7149 : int rc;
7150 :
7151 18 : bio_cb_arg = parent_io->u.bdev.abort.bio_cb_arg;
7152 :
7153 : /* matched_ios is returned and will be kept by the caller.
7154 : *
7155 : * This function will be used for two cases, 1) the same cb_arg is used for
7156 : * multiple I/Os, 2) a single large I/O is split into smaller ones.
7157 : * Incrementing split_outstanding directly here may confuse readers especially
7158 : * for the 1st case.
7159 : *
7160 : * Completion of I/O abort is processed after stack unwinding. Hence this trick
7161 : * works as expected.
7162 : */
7163 18 : matched_ios = 0;
7164 18 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
7165 :
7166 105 : TAILQ_FOREACH(bio_to_abort, &channel->io_submitted, internal.ch_link) {
7167 88 : if (bio_to_abort->internal.caller_ctx != bio_cb_arg) {
7168 65 : continue;
7169 : }
7170 :
7171 23 : if (bio_to_abort->internal.submit_tsc > parent_io->internal.submit_tsc) {
7172 : /* Any I/O which was submitted after this abort command should be excluded. */
7173 0 : continue;
7174 : }
7175 :
7176 : /* We can't abort a request that's being pushed/pulled or executed by accel */
7177 23 : if (bdev_io_on_tailq(bio_to_abort, &channel->io_accel_exec) ||
7178 23 : bdev_io_on_tailq(bio_to_abort, &channel->io_memory_domain)) {
7179 0 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
7180 0 : break;
7181 : }
7182 :
7183 23 : rc = bdev_abort_io(desc, channel, bio_to_abort, bdev_abort_io_done, parent_io);
7184 23 : if (rc != 0) {
7185 1 : if (rc == -ENOMEM) {
7186 1 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_NOMEM;
7187 1 : } else {
7188 0 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
7189 : }
7190 1 : break;
7191 : }
7192 22 : matched_ios++;
7193 22 : }
7194 :
7195 18 : return matched_ios;
7196 : }
7197 :
7198 : static void
7199 1 : bdev_abort_retry(void *ctx)
7200 : {
7201 1 : struct spdk_bdev_io *parent_io = ctx;
7202 : uint32_t matched_ios;
7203 :
7204 1 : matched_ios = _bdev_abort(parent_io);
7205 :
7206 1 : if (matched_ios == 0) {
7207 0 : if (parent_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) {
7208 0 : bdev_queue_io_wait_with_cb(parent_io, bdev_abort_retry);
7209 0 : } else {
7210 : /* For retry, the case that no target I/O was found is success
7211 : * because it means target I/Os completed in the meantime.
7212 : */
7213 0 : bdev_io_complete(parent_io);
7214 : }
7215 0 : return;
7216 : }
7217 :
7218 : /* Use split_outstanding to manage the progress of aborting I/Os. */
7219 1 : parent_io->internal.f.split = true;
7220 1 : parent_io->internal.split.outstanding = matched_ios;
7221 1 : }
7222 :
7223 : static void
7224 17 : bdev_abort(struct spdk_bdev_io *parent_io)
7225 : {
7226 : uint32_t matched_ios;
7227 :
7228 17 : matched_ios = _bdev_abort(parent_io);
7229 :
7230 17 : if (matched_ios == 0) {
7231 2 : if (parent_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) {
7232 1 : bdev_queue_io_wait_with_cb(parent_io, bdev_abort_retry);
7233 1 : } else {
7234 : /* The case the no target I/O was found is failure. */
7235 1 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
7236 1 : bdev_io_complete(parent_io);
7237 : }
7238 2 : return;
7239 : }
7240 :
7241 : /* Use split_outstanding to manage the progress of aborting I/Os. */
7242 15 : parent_io->internal.f.split = true;
7243 15 : parent_io->internal.split.outstanding = matched_ios;
7244 17 : }
7245 :
7246 : int
7247 12 : spdk_bdev_abort(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
7248 : void *bio_cb_arg,
7249 : spdk_bdev_io_completion_cb cb, void *cb_arg)
7250 : {
7251 12 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
7252 12 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
7253 : struct spdk_bdev_io *bdev_io;
7254 :
7255 12 : if (bio_cb_arg == NULL) {
7256 0 : return -EINVAL;
7257 : }
7258 :
7259 12 : if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ABORT)) {
7260 1 : return -ENOTSUP;
7261 : }
7262 :
7263 11 : bdev_io = bdev_channel_get_io(channel);
7264 11 : if (bdev_io == NULL) {
7265 0 : return -ENOMEM;
7266 : }
7267 :
7268 11 : bdev_io->internal.ch = channel;
7269 11 : bdev_io->internal.desc = desc;
7270 11 : bdev_io->internal.submit_tsc = spdk_get_ticks();
7271 11 : bdev_io->type = SPDK_BDEV_IO_TYPE_ABORT;
7272 11 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
7273 :
7274 11 : bdev_io->u.bdev.abort.bio_cb_arg = bio_cb_arg;
7275 :
7276 : /* Parent abort request is not submitted directly, but to manage its execution,
7277 : * add it to the submitted list here.
7278 : */
7279 11 : bdev_ch_add_to_io_submitted(bdev_io);
7280 :
7281 11 : bdev_abort(bdev_io);
7282 :
7283 11 : return 0;
7284 12 : }
7285 :
7286 : int
7287 4 : spdk_bdev_queue_io_wait(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
7288 : struct spdk_bdev_io_wait_entry *entry)
7289 : {
7290 4 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
7291 4 : struct spdk_bdev_mgmt_channel *mgmt_ch = channel->shared_resource->mgmt_ch;
7292 :
7293 4 : if (bdev != entry->bdev) {
7294 0 : SPDK_ERRLOG("bdevs do not match\n");
7295 0 : return -EINVAL;
7296 : }
7297 :
7298 4 : if (mgmt_ch->per_thread_cache_count > 0) {
7299 0 : SPDK_ERRLOG("Cannot queue io_wait if spdk_bdev_io available in per-thread cache\n");
7300 0 : return -EINVAL;
7301 : }
7302 :
7303 4 : TAILQ_INSERT_TAIL(&mgmt_ch->io_wait_queue, entry, link);
7304 4 : return 0;
7305 4 : }
7306 :
7307 : static inline void
7308 610 : bdev_io_update_io_stat(struct spdk_bdev_io *bdev_io, uint64_t tsc_diff)
7309 : {
7310 610 : enum spdk_bdev_io_status io_status = bdev_io->internal.status;
7311 610 : struct spdk_bdev_io_stat *io_stat = bdev_io->internal.ch->stat;
7312 610 : uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
7313 610 : uint32_t blocklen = bdev_io->bdev->blocklen;
7314 :
7315 610 : if (spdk_likely(io_status == SPDK_BDEV_IO_STATUS_SUCCESS)) {
7316 516 : switch (bdev_io->type) {
7317 : case SPDK_BDEV_IO_TYPE_READ:
7318 321 : io_stat->bytes_read += num_blocks * blocklen;
7319 321 : io_stat->num_read_ops++;
7320 321 : io_stat->read_latency_ticks += tsc_diff;
7321 321 : if (io_stat->max_read_latency_ticks < tsc_diff) {
7322 7 : io_stat->max_read_latency_ticks = tsc_diff;
7323 7 : }
7324 321 : if (io_stat->min_read_latency_ticks > tsc_diff) {
7325 42 : io_stat->min_read_latency_ticks = tsc_diff;
7326 42 : }
7327 321 : break;
7328 : case SPDK_BDEV_IO_TYPE_WRITE:
7329 75 : io_stat->bytes_written += num_blocks * blocklen;
7330 75 : io_stat->num_write_ops++;
7331 75 : io_stat->write_latency_ticks += tsc_diff;
7332 75 : if (io_stat->max_write_latency_ticks < tsc_diff) {
7333 4 : io_stat->max_write_latency_ticks = tsc_diff;
7334 4 : }
7335 75 : if (io_stat->min_write_latency_ticks > tsc_diff) {
7336 25 : io_stat->min_write_latency_ticks = tsc_diff;
7337 25 : }
7338 75 : break;
7339 : case SPDK_BDEV_IO_TYPE_UNMAP:
7340 20 : io_stat->bytes_unmapped += num_blocks * blocklen;
7341 20 : io_stat->num_unmap_ops++;
7342 20 : io_stat->unmap_latency_ticks += tsc_diff;
7343 20 : if (io_stat->max_unmap_latency_ticks < tsc_diff) {
7344 0 : io_stat->max_unmap_latency_ticks = tsc_diff;
7345 0 : }
7346 20 : if (io_stat->min_unmap_latency_ticks > tsc_diff) {
7347 3 : io_stat->min_unmap_latency_ticks = tsc_diff;
7348 3 : }
7349 20 : break;
7350 : case SPDK_BDEV_IO_TYPE_ZCOPY:
7351 : /* Track the data in the start phase only */
7352 4 : if (bdev_io->u.bdev.zcopy.start) {
7353 2 : if (bdev_io->u.bdev.zcopy.populate) {
7354 1 : io_stat->bytes_read += num_blocks * blocklen;
7355 1 : io_stat->num_read_ops++;
7356 1 : io_stat->read_latency_ticks += tsc_diff;
7357 1 : if (io_stat->max_read_latency_ticks < tsc_diff) {
7358 0 : io_stat->max_read_latency_ticks = tsc_diff;
7359 0 : }
7360 1 : if (io_stat->min_read_latency_ticks > tsc_diff) {
7361 1 : io_stat->min_read_latency_ticks = tsc_diff;
7362 1 : }
7363 1 : } else {
7364 1 : io_stat->bytes_written += num_blocks * blocklen;
7365 1 : io_stat->num_write_ops++;
7366 1 : io_stat->write_latency_ticks += tsc_diff;
7367 1 : if (io_stat->max_write_latency_ticks < tsc_diff) {
7368 0 : io_stat->max_write_latency_ticks = tsc_diff;
7369 0 : }
7370 1 : if (io_stat->min_write_latency_ticks > tsc_diff) {
7371 1 : io_stat->min_write_latency_ticks = tsc_diff;
7372 1 : }
7373 : }
7374 2 : }
7375 4 : break;
7376 : case SPDK_BDEV_IO_TYPE_COPY:
7377 21 : io_stat->bytes_copied += num_blocks * blocklen;
7378 21 : io_stat->num_copy_ops++;
7379 21 : bdev_io->internal.ch->stat->copy_latency_ticks += tsc_diff;
7380 21 : if (io_stat->max_copy_latency_ticks < tsc_diff) {
7381 0 : io_stat->max_copy_latency_ticks = tsc_diff;
7382 0 : }
7383 21 : if (io_stat->min_copy_latency_ticks > tsc_diff) {
7384 4 : io_stat->min_copy_latency_ticks = tsc_diff;
7385 4 : }
7386 21 : break;
7387 : default:
7388 75 : break;
7389 : }
7390 610 : } else if (io_status <= SPDK_BDEV_IO_STATUS_FAILED && io_status >= SPDK_MIN_BDEV_IO_STATUS) {
7391 94 : io_stat = bdev_io->bdev->internal.stat;
7392 94 : assert(io_stat->io_error != NULL);
7393 :
7394 94 : spdk_spin_lock(&bdev_io->bdev->internal.spinlock);
7395 94 : io_stat->io_error->error_status[-io_status - 1]++;
7396 94 : spdk_spin_unlock(&bdev_io->bdev->internal.spinlock);
7397 94 : }
7398 :
7399 : #ifdef SPDK_CONFIG_VTUNE
7400 : uint64_t now_tsc = spdk_get_ticks();
7401 : if (now_tsc > (bdev_io->internal.ch->start_tsc + bdev_io->internal.ch->interval_tsc)) {
7402 : uint64_t data[5];
7403 : struct spdk_bdev_io_stat *prev_stat = bdev_io->internal.ch->prev_stat;
7404 :
7405 : data[0] = io_stat->num_read_ops - prev_stat->num_read_ops;
7406 : data[1] = io_stat->bytes_read - prev_stat->bytes_read;
7407 : data[2] = io_stat->num_write_ops - prev_stat->num_write_ops;
7408 : data[3] = io_stat->bytes_written - prev_stat->bytes_written;
7409 : data[4] = bdev_io->bdev->fn_table->get_spin_time ?
7410 : bdev_io->bdev->fn_table->get_spin_time(spdk_bdev_io_get_io_channel(bdev_io)) : 0;
7411 :
7412 : __itt_metadata_add(g_bdev_mgr.domain, __itt_null, bdev_io->internal.ch->handle,
7413 : __itt_metadata_u64, 5, data);
7414 :
7415 : memcpy(prev_stat, io_stat, sizeof(struct spdk_bdev_io_stat));
7416 : bdev_io->internal.ch->start_tsc = now_tsc;
7417 : }
7418 : #endif
7419 610 : }
7420 :
7421 : static inline void
7422 610 : _bdev_io_complete(void *ctx)
7423 : {
7424 610 : struct spdk_bdev_io *bdev_io = ctx;
7425 :
7426 610 : if (spdk_unlikely(bdev_io_use_accel_sequence(bdev_io))) {
7427 0 : assert(bdev_io->internal.status != SPDK_BDEV_IO_STATUS_SUCCESS);
7428 0 : spdk_accel_sequence_abort(bdev_io->internal.accel_sequence);
7429 0 : }
7430 :
7431 610 : assert(bdev_io->internal.cb != NULL);
7432 610 : assert(spdk_get_thread() == spdk_bdev_io_get_thread(bdev_io));
7433 :
7434 1220 : bdev_io->internal.cb(bdev_io, bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
7435 610 : bdev_io->internal.caller_ctx);
7436 610 : }
7437 :
7438 : static inline void
7439 618 : bdev_io_complete(void *ctx)
7440 : {
7441 618 : struct spdk_bdev_io *bdev_io = ctx;
7442 618 : struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
7443 : uint64_t tsc, tsc_diff;
7444 :
7445 618 : if (spdk_unlikely(bdev_io->internal.f.in_submit_request)) {
7446 : /*
7447 : * Defer completion to avoid potential infinite recursion if the
7448 : * user's completion callback issues a new I/O.
7449 : */
7450 16 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
7451 8 : bdev_io_complete, bdev_io);
7452 8 : return;
7453 : }
7454 :
7455 610 : tsc = spdk_get_ticks();
7456 610 : tsc_diff = tsc - bdev_io->internal.submit_tsc;
7457 :
7458 610 : bdev_ch_remove_from_io_submitted(bdev_io);
7459 610 : spdk_trace_record_tsc(tsc, TRACE_BDEV_IO_DONE, bdev_ch->trace_id, 0, (uintptr_t)bdev_io,
7460 : bdev_io->internal.caller_ctx, bdev_ch->queue_depth);
7461 :
7462 610 : if (bdev_ch->histogram) {
7463 4 : if (bdev_io->bdev->internal.histogram_io_type == 0 ||
7464 0 : bdev_io->bdev->internal.histogram_io_type == bdev_io->type) {
7465 : /*
7466 : * Tally all I/O types if the histogram_io_type is set to 0.
7467 : */
7468 4 : spdk_histogram_data_tally(bdev_ch->histogram, tsc_diff);
7469 4 : }
7470 4 : }
7471 :
7472 610 : bdev_io_update_io_stat(bdev_io, tsc_diff);
7473 610 : _bdev_io_complete(bdev_io);
7474 618 : }
7475 :
7476 : /* The difference between this function and bdev_io_complete() is that this should be called to
7477 : * complete IOs that haven't been submitted via bdev_io_submit(), as they weren't added onto the
7478 : * io_submitted list and don't have submit_tsc updated.
7479 : */
7480 : static inline void
7481 0 : bdev_io_complete_unsubmitted(struct spdk_bdev_io *bdev_io)
7482 : {
7483 : /* Since the IO hasn't been submitted it's bound to be failed */
7484 0 : assert(bdev_io->internal.status != SPDK_BDEV_IO_STATUS_SUCCESS);
7485 :
7486 : /* At this point we don't know if the IO is completed from submission context or not, but,
7487 : * since this is an error path, we can always do an spdk_thread_send_msg(). */
7488 0 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
7489 0 : _bdev_io_complete, bdev_io);
7490 0 : }
7491 :
7492 : static void bdev_destroy_cb(void *io_device);
7493 :
7494 : static void
7495 15 : bdev_reset_complete(struct spdk_bdev *bdev, void *_ctx, int status)
7496 : {
7497 15 : struct spdk_bdev_io *bdev_io = _ctx;
7498 :
7499 15 : if (bdev_io->u.reset.ch_ref != NULL) {
7500 15 : spdk_put_io_channel(bdev_io->u.reset.ch_ref);
7501 15 : bdev_io->u.reset.ch_ref = NULL;
7502 15 : }
7503 :
7504 15 : bdev_io_complete(bdev_io);
7505 :
7506 15 : if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING &&
7507 1 : TAILQ_EMPTY(&bdev->internal.open_descs)) {
7508 1 : spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb);
7509 1 : }
7510 15 : }
7511 :
7512 : static void
7513 18 : bdev_unfreeze_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
7514 : struct spdk_io_channel *_ch, void *_ctx)
7515 : {
7516 18 : struct spdk_bdev_io *bdev_io = _ctx;
7517 18 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
7518 : struct spdk_bdev_io *queued_reset;
7519 :
7520 18 : ch->flags &= ~BDEV_CH_RESET_IN_PROGRESS;
7521 18 : while (!TAILQ_EMPTY(&ch->queued_resets)) {
7522 0 : queued_reset = TAILQ_FIRST(&ch->queued_resets);
7523 0 : TAILQ_REMOVE(&ch->queued_resets, queued_reset, internal.link);
7524 0 : spdk_bdev_io_complete(queued_reset, bdev_io->internal.status);
7525 : }
7526 :
7527 18 : spdk_bdev_for_each_channel_continue(i, 0);
7528 18 : }
7529 :
7530 : static void
7531 0 : bdev_io_complete_sequence_cb(void *ctx, int status)
7532 : {
7533 0 : struct spdk_bdev_io *bdev_io = ctx;
7534 :
7535 : /* u.bdev.accel_sequence should have already been cleared at this point */
7536 0 : assert(bdev_io->u.bdev.accel_sequence == NULL);
7537 0 : assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
7538 0 : bdev_io->internal.f.has_accel_sequence = false;
7539 :
7540 0 : if (spdk_unlikely(status != 0)) {
7541 0 : SPDK_ERRLOG("Failed to execute accel sequence, status=%d\n", status);
7542 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
7543 0 : }
7544 :
7545 0 : bdev_io_complete(bdev_io);
7546 0 : }
7547 :
7548 : void
7549 598 : spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status)
7550 : {
7551 598 : struct spdk_bdev *bdev = bdev_io->bdev;
7552 598 : struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
7553 598 : struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
7554 :
7555 598 : if (spdk_unlikely(bdev_io->internal.status != SPDK_BDEV_IO_STATUS_PENDING)) {
7556 0 : SPDK_ERRLOG("Unexpected completion on IO from %s module, status was %s\n",
7557 : spdk_bdev_get_module_name(bdev),
7558 : bdev_io_status_get_string(bdev_io->internal.status));
7559 0 : assert(false);
7560 : }
7561 598 : bdev_io->internal.status = status;
7562 :
7563 598 : if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_RESET)) {
7564 16 : bool unlock_channels = false;
7565 :
7566 16 : if (status == SPDK_BDEV_IO_STATUS_NOMEM) {
7567 0 : SPDK_ERRLOG("NOMEM returned for reset\n");
7568 0 : }
7569 16 : spdk_spin_lock(&bdev->internal.spinlock);
7570 16 : if (bdev_io == bdev->internal.reset_in_progress) {
7571 15 : bdev->internal.reset_in_progress = NULL;
7572 15 : unlock_channels = true;
7573 15 : }
7574 16 : spdk_spin_unlock(&bdev->internal.spinlock);
7575 :
7576 16 : if (unlock_channels) {
7577 15 : spdk_bdev_for_each_channel(bdev, bdev_unfreeze_channel, bdev_io,
7578 : bdev_reset_complete);
7579 15 : return;
7580 : }
7581 1 : } else {
7582 582 : bdev_io_decrement_outstanding(bdev_ch, shared_resource);
7583 582 : if (spdk_likely(status == SPDK_BDEV_IO_STATUS_SUCCESS)) {
7584 485 : if (bdev_io_needs_sequence_exec(bdev_io->internal.desc, bdev_io)) {
7585 0 : bdev_io_exec_sequence(bdev_io, bdev_io_complete_sequence_cb);
7586 0 : return;
7587 485 : } else if (spdk_unlikely(bdev_io->internal.f.has_bounce_buf &&
7588 : !bdev_io_use_accel_sequence(bdev_io))) {
7589 26 : _bdev_io_push_bounce_data_buffer(bdev_io,
7590 : _bdev_io_complete_push_bounce_done);
7591 : /* bdev IO will be completed in the callback */
7592 26 : return;
7593 : }
7594 459 : }
7595 :
7596 556 : if (spdk_unlikely(_bdev_io_handle_no_mem(bdev_io, BDEV_IO_RETRY_STATE_SUBMIT))) {
7597 5 : return;
7598 : }
7599 : }
7600 :
7601 552 : bdev_io_complete(bdev_io);
7602 598 : }
7603 :
7604 : void
7605 0 : spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc,
7606 : enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq)
7607 : {
7608 : enum spdk_bdev_io_status status;
7609 :
7610 0 : if (sc == SPDK_SCSI_STATUS_GOOD) {
7611 0 : status = SPDK_BDEV_IO_STATUS_SUCCESS;
7612 0 : } else {
7613 0 : status = SPDK_BDEV_IO_STATUS_SCSI_ERROR;
7614 0 : bdev_io->internal.error.scsi.sc = sc;
7615 0 : bdev_io->internal.error.scsi.sk = sk;
7616 0 : bdev_io->internal.error.scsi.asc = asc;
7617 0 : bdev_io->internal.error.scsi.ascq = ascq;
7618 : }
7619 :
7620 0 : spdk_bdev_io_complete(bdev_io, status);
7621 0 : }
7622 :
7623 : void
7624 0 : spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io,
7625 : int *sc, int *sk, int *asc, int *ascq)
7626 : {
7627 0 : assert(sc != NULL);
7628 0 : assert(sk != NULL);
7629 0 : assert(asc != NULL);
7630 0 : assert(ascq != NULL);
7631 :
7632 0 : switch (bdev_io->internal.status) {
7633 : case SPDK_BDEV_IO_STATUS_SUCCESS:
7634 0 : *sc = SPDK_SCSI_STATUS_GOOD;
7635 0 : *sk = SPDK_SCSI_SENSE_NO_SENSE;
7636 0 : *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
7637 0 : *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
7638 0 : break;
7639 : case SPDK_BDEV_IO_STATUS_NVME_ERROR:
7640 0 : spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq);
7641 0 : break;
7642 : case SPDK_BDEV_IO_STATUS_MISCOMPARE:
7643 0 : *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
7644 0 : *sk = SPDK_SCSI_SENSE_MISCOMPARE;
7645 0 : *asc = SPDK_SCSI_ASC_MISCOMPARE_DURING_VERIFY_OPERATION;
7646 0 : *ascq = bdev_io->internal.error.scsi.ascq;
7647 0 : break;
7648 : case SPDK_BDEV_IO_STATUS_SCSI_ERROR:
7649 0 : *sc = bdev_io->internal.error.scsi.sc;
7650 0 : *sk = bdev_io->internal.error.scsi.sk;
7651 0 : *asc = bdev_io->internal.error.scsi.asc;
7652 0 : *ascq = bdev_io->internal.error.scsi.ascq;
7653 0 : break;
7654 : default:
7655 0 : *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
7656 0 : *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND;
7657 0 : *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
7658 0 : *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
7659 0 : break;
7660 : }
7661 0 : }
7662 :
7663 : void
7664 0 : spdk_bdev_io_complete_aio_status(struct spdk_bdev_io *bdev_io, int aio_result)
7665 : {
7666 : enum spdk_bdev_io_status status;
7667 :
7668 0 : if (aio_result == 0) {
7669 0 : status = SPDK_BDEV_IO_STATUS_SUCCESS;
7670 0 : } else {
7671 0 : status = SPDK_BDEV_IO_STATUS_AIO_ERROR;
7672 : }
7673 :
7674 0 : bdev_io->internal.error.aio_result = aio_result;
7675 :
7676 0 : spdk_bdev_io_complete(bdev_io, status);
7677 0 : }
7678 :
7679 : void
7680 0 : spdk_bdev_io_get_aio_status(const struct spdk_bdev_io *bdev_io, int *aio_result)
7681 : {
7682 0 : assert(aio_result != NULL);
7683 :
7684 0 : if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_AIO_ERROR) {
7685 0 : *aio_result = bdev_io->internal.error.aio_result;
7686 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
7687 0 : *aio_result = 0;
7688 0 : } else {
7689 0 : *aio_result = -EIO;
7690 : }
7691 0 : }
7692 :
7693 : void
7694 0 : spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, uint32_t cdw0, int sct, int sc)
7695 : {
7696 : enum spdk_bdev_io_status status;
7697 :
7698 0 : if (spdk_likely(sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS)) {
7699 0 : status = SPDK_BDEV_IO_STATUS_SUCCESS;
7700 0 : } else if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_ABORTED_BY_REQUEST) {
7701 0 : status = SPDK_BDEV_IO_STATUS_ABORTED;
7702 0 : } else {
7703 0 : status = SPDK_BDEV_IO_STATUS_NVME_ERROR;
7704 : }
7705 :
7706 0 : bdev_io->internal.error.nvme.cdw0 = cdw0;
7707 0 : bdev_io->internal.error.nvme.sct = sct;
7708 0 : bdev_io->internal.error.nvme.sc = sc;
7709 :
7710 0 : spdk_bdev_io_complete(bdev_io, status);
7711 0 : }
7712 :
7713 : void
7714 0 : spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, uint32_t *cdw0, int *sct, int *sc)
7715 : {
7716 0 : assert(sct != NULL);
7717 0 : assert(sc != NULL);
7718 0 : assert(cdw0 != NULL);
7719 :
7720 0 : if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT)) {
7721 0 : *sct = SPDK_NVME_SCT_GENERIC;
7722 0 : *sc = SPDK_NVME_SC_SUCCESS;
7723 0 : if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
7724 0 : *cdw0 = 0;
7725 0 : } else {
7726 0 : *cdw0 = 1U;
7727 : }
7728 0 : return;
7729 : }
7730 :
7731 0 : if (spdk_likely(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS)) {
7732 0 : *sct = SPDK_NVME_SCT_GENERIC;
7733 0 : *sc = SPDK_NVME_SC_SUCCESS;
7734 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NVME_ERROR) {
7735 0 : *sct = bdev_io->internal.error.nvme.sct;
7736 0 : *sc = bdev_io->internal.error.nvme.sc;
7737 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED) {
7738 0 : *sct = SPDK_NVME_SCT_GENERIC;
7739 0 : *sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
7740 0 : } else {
7741 0 : *sct = SPDK_NVME_SCT_GENERIC;
7742 0 : *sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
7743 : }
7744 :
7745 0 : *cdw0 = bdev_io->internal.error.nvme.cdw0;
7746 0 : }
7747 :
7748 : void
7749 0 : spdk_bdev_io_get_nvme_fused_status(const struct spdk_bdev_io *bdev_io, uint32_t *cdw0,
7750 : int *first_sct, int *first_sc, int *second_sct, int *second_sc)
7751 : {
7752 0 : assert(first_sct != NULL);
7753 0 : assert(first_sc != NULL);
7754 0 : assert(second_sct != NULL);
7755 0 : assert(second_sc != NULL);
7756 0 : assert(cdw0 != NULL);
7757 :
7758 0 : if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NVME_ERROR) {
7759 0 : if (bdev_io->internal.error.nvme.sct == SPDK_NVME_SCT_MEDIA_ERROR &&
7760 0 : bdev_io->internal.error.nvme.sc == SPDK_NVME_SC_COMPARE_FAILURE) {
7761 0 : *first_sct = bdev_io->internal.error.nvme.sct;
7762 0 : *first_sc = bdev_io->internal.error.nvme.sc;
7763 0 : *second_sct = SPDK_NVME_SCT_GENERIC;
7764 0 : *second_sc = SPDK_NVME_SC_ABORTED_FAILED_FUSED;
7765 0 : } else {
7766 0 : *first_sct = SPDK_NVME_SCT_GENERIC;
7767 0 : *first_sc = SPDK_NVME_SC_SUCCESS;
7768 0 : *second_sct = bdev_io->internal.error.nvme.sct;
7769 0 : *second_sc = bdev_io->internal.error.nvme.sc;
7770 : }
7771 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED) {
7772 0 : *first_sct = SPDK_NVME_SCT_GENERIC;
7773 0 : *first_sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
7774 0 : *second_sct = SPDK_NVME_SCT_GENERIC;
7775 0 : *second_sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
7776 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
7777 0 : *first_sct = SPDK_NVME_SCT_GENERIC;
7778 0 : *first_sc = SPDK_NVME_SC_SUCCESS;
7779 0 : *second_sct = SPDK_NVME_SCT_GENERIC;
7780 0 : *second_sc = SPDK_NVME_SC_SUCCESS;
7781 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_FIRST_FUSED_FAILED) {
7782 0 : *first_sct = SPDK_NVME_SCT_GENERIC;
7783 0 : *first_sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
7784 0 : *second_sct = SPDK_NVME_SCT_GENERIC;
7785 0 : *second_sc = SPDK_NVME_SC_ABORTED_FAILED_FUSED;
7786 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_MISCOMPARE) {
7787 0 : *first_sct = SPDK_NVME_SCT_MEDIA_ERROR;
7788 0 : *first_sc = SPDK_NVME_SC_COMPARE_FAILURE;
7789 0 : *second_sct = SPDK_NVME_SCT_GENERIC;
7790 0 : *second_sc = SPDK_NVME_SC_ABORTED_FAILED_FUSED;
7791 0 : } else {
7792 0 : *first_sct = SPDK_NVME_SCT_GENERIC;
7793 0 : *first_sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
7794 0 : *second_sct = SPDK_NVME_SCT_GENERIC;
7795 0 : *second_sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
7796 : }
7797 :
7798 0 : *cdw0 = bdev_io->internal.error.nvme.cdw0;
7799 0 : }
7800 :
7801 : void
7802 0 : spdk_bdev_io_complete_base_io_status(struct spdk_bdev_io *bdev_io,
7803 : const struct spdk_bdev_io *base_io)
7804 : {
7805 0 : switch (base_io->internal.status) {
7806 : case SPDK_BDEV_IO_STATUS_NVME_ERROR:
7807 0 : spdk_bdev_io_complete_nvme_status(bdev_io,
7808 0 : base_io->internal.error.nvme.cdw0,
7809 0 : base_io->internal.error.nvme.sct,
7810 0 : base_io->internal.error.nvme.sc);
7811 0 : break;
7812 : case SPDK_BDEV_IO_STATUS_SCSI_ERROR:
7813 0 : spdk_bdev_io_complete_scsi_status(bdev_io,
7814 0 : base_io->internal.error.scsi.sc,
7815 0 : base_io->internal.error.scsi.sk,
7816 0 : base_io->internal.error.scsi.asc,
7817 0 : base_io->internal.error.scsi.ascq);
7818 0 : break;
7819 : case SPDK_BDEV_IO_STATUS_AIO_ERROR:
7820 0 : spdk_bdev_io_complete_aio_status(bdev_io, base_io->internal.error.aio_result);
7821 0 : break;
7822 : default:
7823 0 : spdk_bdev_io_complete(bdev_io, base_io->internal.status);
7824 0 : break;
7825 : }
7826 0 : }
7827 :
7828 : struct spdk_thread *
7829 660 : spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io)
7830 : {
7831 660 : return spdk_io_channel_get_thread(bdev_io->internal.ch->channel);
7832 : }
7833 :
7834 : struct spdk_io_channel *
7835 70 : spdk_bdev_io_get_io_channel(struct spdk_bdev_io *bdev_io)
7836 : {
7837 70 : return bdev_io->internal.ch->channel;
7838 : }
7839 :
7840 : static int
7841 127 : bdev_register(struct spdk_bdev *bdev)
7842 : {
7843 : char *bdev_name;
7844 : char uuid[SPDK_UUID_STRING_LEN];
7845 : struct spdk_iobuf_opts iobuf_opts;
7846 : int ret;
7847 :
7848 127 : assert(bdev->module != NULL);
7849 :
7850 127 : if (!bdev->name) {
7851 0 : SPDK_ERRLOG("Bdev name is NULL\n");
7852 0 : return -EINVAL;
7853 : }
7854 :
7855 127 : if (!strlen(bdev->name)) {
7856 0 : SPDK_ERRLOG("Bdev name must not be an empty string\n");
7857 0 : return -EINVAL;
7858 : }
7859 :
7860 : /* Users often register their own I/O devices using the bdev name. In
7861 : * order to avoid conflicts, prepend bdev_. */
7862 127 : bdev_name = spdk_sprintf_alloc("bdev_%s", bdev->name);
7863 127 : if (!bdev_name) {
7864 0 : SPDK_ERRLOG("Unable to allocate memory for internal bdev name.\n");
7865 0 : return -ENOMEM;
7866 : }
7867 :
7868 127 : bdev->internal.stat = bdev_alloc_io_stat(true);
7869 127 : if (!bdev->internal.stat) {
7870 0 : SPDK_ERRLOG("Unable to allocate I/O statistics structure.\n");
7871 0 : free(bdev_name);
7872 0 : return -ENOMEM;
7873 : }
7874 :
7875 127 : bdev->internal.status = SPDK_BDEV_STATUS_READY;
7876 127 : bdev->internal.measured_queue_depth = UINT64_MAX;
7877 127 : bdev->internal.claim_type = SPDK_BDEV_CLAIM_NONE;
7878 127 : memset(&bdev->internal.claim, 0, sizeof(bdev->internal.claim));
7879 127 : bdev->internal.qd_poller = NULL;
7880 127 : bdev->internal.qos = NULL;
7881 :
7882 127 : TAILQ_INIT(&bdev->internal.open_descs);
7883 127 : TAILQ_INIT(&bdev->internal.locked_ranges);
7884 127 : TAILQ_INIT(&bdev->internal.pending_locked_ranges);
7885 127 : TAILQ_INIT(&bdev->aliases);
7886 :
7887 : /* UUID may be specified by the user or defined by bdev itself.
7888 : * Otherwise it will be generated here, so this field will never be empty. */
7889 127 : if (spdk_uuid_is_null(&bdev->uuid)) {
7890 42 : spdk_uuid_generate(&bdev->uuid);
7891 42 : }
7892 :
7893 : /* Add the UUID alias only if it's different than the name */
7894 127 : spdk_uuid_fmt_lower(uuid, sizeof(uuid), &bdev->uuid);
7895 127 : if (strcmp(bdev->name, uuid) != 0) {
7896 126 : ret = spdk_bdev_alias_add(bdev, uuid);
7897 126 : if (ret != 0) {
7898 2 : SPDK_ERRLOG("Unable to add uuid:%s alias for bdev %s\n", uuid, bdev->name);
7899 2 : bdev_free_io_stat(bdev->internal.stat);
7900 2 : free(bdev_name);
7901 2 : return ret;
7902 : }
7903 124 : }
7904 :
7905 125 : spdk_iobuf_get_opts(&iobuf_opts, sizeof(iobuf_opts));
7906 125 : if (spdk_bdev_get_buf_align(bdev) > 1) {
7907 0 : bdev->max_rw_size = spdk_min(bdev->max_rw_size ? bdev->max_rw_size : UINT32_MAX,
7908 : iobuf_opts.large_bufsize / bdev->blocklen);
7909 0 : }
7910 :
7911 : /* If the user didn't specify a write unit size, set it to one. */
7912 125 : if (bdev->write_unit_size == 0) {
7913 121 : bdev->write_unit_size = 1;
7914 121 : }
7915 :
7916 : /* Set ACWU value to the write unit size if bdev module did not set it (does not support it natively) */
7917 125 : if (bdev->acwu == 0) {
7918 121 : bdev->acwu = bdev->write_unit_size;
7919 121 : }
7920 :
7921 125 : if (bdev->phys_blocklen == 0) {
7922 121 : bdev->phys_blocklen = spdk_bdev_get_data_block_size(bdev);
7923 121 : }
7924 :
7925 125 : if (!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COPY)) {
7926 0 : bdev->max_copy = bdev_get_max_write(bdev, iobuf_opts.large_bufsize);
7927 0 : }
7928 :
7929 125 : if (!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
7930 0 : bdev->max_write_zeroes = bdev_get_max_write(bdev, ZERO_BUFFER_SIZE);
7931 0 : }
7932 :
7933 125 : bdev->internal.reset_in_progress = NULL;
7934 125 : bdev->internal.qd_poll_in_progress = false;
7935 125 : bdev->internal.period = 0;
7936 125 : bdev->internal.new_period = 0;
7937 125 : bdev->internal.trace_id = spdk_trace_register_owner(OWNER_TYPE_BDEV, bdev_name);
7938 :
7939 : /*
7940 : * Initialize spinlock before registering IO device because spinlock is used in
7941 : * bdev_channel_create
7942 : */
7943 125 : spdk_spin_init(&bdev->internal.spinlock);
7944 :
7945 250 : spdk_io_device_register(__bdev_to_io_dev(bdev),
7946 : bdev_channel_create, bdev_channel_destroy,
7947 : sizeof(struct spdk_bdev_channel),
7948 125 : bdev_name);
7949 :
7950 : /*
7951 : * Register bdev name only after the bdev object is ready.
7952 : * After bdev_name_add returns, it is possible for other threads to start using the bdev,
7953 : * create IO channels...
7954 : */
7955 125 : ret = bdev_name_add(&bdev->internal.bdev_name, bdev, bdev->name);
7956 125 : if (ret != 0) {
7957 0 : spdk_io_device_unregister(__bdev_to_io_dev(bdev), NULL);
7958 0 : bdev_free_io_stat(bdev->internal.stat);
7959 0 : spdk_spin_destroy(&bdev->internal.spinlock);
7960 0 : free(bdev_name);
7961 0 : return ret;
7962 : }
7963 :
7964 125 : free(bdev_name);
7965 :
7966 125 : SPDK_DEBUGLOG(bdev, "Inserting bdev %s into list\n", bdev->name);
7967 125 : TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, internal.link);
7968 :
7969 125 : return 0;
7970 127 : }
7971 :
7972 : static void
7973 126 : bdev_destroy_cb(void *io_device)
7974 : {
7975 : int rc;
7976 : struct spdk_bdev *bdev;
7977 : spdk_bdev_unregister_cb cb_fn;
7978 : void *cb_arg;
7979 :
7980 126 : bdev = __bdev_from_io_dev(io_device);
7981 :
7982 126 : if (bdev->internal.unregister_td != spdk_get_thread()) {
7983 1 : spdk_thread_send_msg(bdev->internal.unregister_td, bdev_destroy_cb, io_device);
7984 1 : return;
7985 : }
7986 :
7987 125 : cb_fn = bdev->internal.unregister_cb;
7988 125 : cb_arg = bdev->internal.unregister_ctx;
7989 :
7990 125 : spdk_spin_destroy(&bdev->internal.spinlock);
7991 125 : free(bdev->internal.qos);
7992 125 : bdev_free_io_stat(bdev->internal.stat);
7993 125 : spdk_trace_unregister_owner(bdev->internal.trace_id);
7994 :
7995 125 : rc = bdev->fn_table->destruct(bdev->ctxt);
7996 125 : if (rc < 0) {
7997 0 : SPDK_ERRLOG("destruct failed\n");
7998 0 : }
7999 125 : if (rc <= 0 && cb_fn != NULL) {
8000 10 : cb_fn(cb_arg, rc);
8001 10 : }
8002 126 : }
8003 :
8004 : void
8005 2 : spdk_bdev_destruct_done(struct spdk_bdev *bdev, int bdeverrno)
8006 : {
8007 2 : if (bdev->internal.unregister_cb != NULL) {
8008 0 : bdev->internal.unregister_cb(bdev->internal.unregister_ctx, bdeverrno);
8009 0 : }
8010 2 : }
8011 :
8012 : static void
8013 19 : _remove_notify(void *arg)
8014 : {
8015 19 : struct spdk_bdev_desc *desc = arg;
8016 :
8017 19 : _event_notify(desc, SPDK_BDEV_EVENT_REMOVE);
8018 19 : }
8019 :
8020 : /* returns: 0 - bdev removed and ready to be destructed.
8021 : * -EBUSY - bdev can't be destructed yet. */
8022 : static int
8023 140 : bdev_unregister_unsafe(struct spdk_bdev *bdev)
8024 : {
8025 : struct spdk_bdev_desc *desc, *tmp;
8026 140 : int rc = 0;
8027 : char uuid[SPDK_UUID_STRING_LEN];
8028 :
8029 140 : assert(spdk_spin_held(&g_bdev_mgr.spinlock));
8030 140 : assert(spdk_spin_held(&bdev->internal.spinlock));
8031 :
8032 : /* Notify each descriptor about hotremoval */
8033 159 : TAILQ_FOREACH_SAFE(desc, &bdev->internal.open_descs, link, tmp) {
8034 19 : rc = -EBUSY;
8035 : /*
8036 : * Defer invocation of the event_cb to a separate message that will
8037 : * run later on its thread. This ensures this context unwinds and
8038 : * we don't recursively unregister this bdev again if the event_cb
8039 : * immediately closes its descriptor.
8040 : */
8041 19 : event_notify(desc, _remove_notify);
8042 19 : }
8043 :
8044 : /* If there are no descriptors, proceed removing the bdev */
8045 140 : if (rc == 0) {
8046 125 : TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, internal.link);
8047 125 : SPDK_DEBUGLOG(bdev, "Removing bdev %s from list done\n", bdev->name);
8048 :
8049 : /* Delete the name and the UUID alias */
8050 125 : spdk_uuid_fmt_lower(uuid, sizeof(uuid), &bdev->uuid);
8051 125 : bdev_name_del_unsafe(&bdev->internal.bdev_name);
8052 125 : bdev_alias_del(bdev, uuid, bdev_name_del_unsafe);
8053 :
8054 125 : spdk_notify_send("bdev_unregister", spdk_bdev_get_name(bdev));
8055 :
8056 125 : if (bdev->internal.reset_in_progress != NULL) {
8057 : /* If reset is in progress, let the completion callback for reset
8058 : * unregister the bdev.
8059 : */
8060 1 : rc = -EBUSY;
8061 1 : }
8062 125 : }
8063 :
8064 140 : return rc;
8065 : }
8066 :
8067 : static void
8068 4 : bdev_unregister_abort_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
8069 : struct spdk_io_channel *io_ch, void *_ctx)
8070 : {
8071 4 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(io_ch);
8072 :
8073 4 : bdev_channel_abort_queued_ios(bdev_ch);
8074 4 : spdk_bdev_for_each_channel_continue(i, 0);
8075 4 : }
8076 :
8077 : static void
8078 125 : bdev_unregister(struct spdk_bdev *bdev, void *_ctx, int status)
8079 : {
8080 : int rc;
8081 :
8082 125 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8083 125 : spdk_spin_lock(&bdev->internal.spinlock);
8084 : /*
8085 : * Set the status to REMOVING after completing to abort channels. Otherwise,
8086 : * the last spdk_bdev_close() may call spdk_io_device_unregister() while
8087 : * spdk_bdev_for_each_channel() is executed and spdk_io_device_unregister()
8088 : * may fail.
8089 : */
8090 125 : bdev->internal.status = SPDK_BDEV_STATUS_REMOVING;
8091 125 : rc = bdev_unregister_unsafe(bdev);
8092 125 : spdk_spin_unlock(&bdev->internal.spinlock);
8093 125 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8094 :
8095 125 : if (rc == 0) {
8096 109 : spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb);
8097 109 : }
8098 125 : }
8099 :
8100 : void
8101 132 : spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
8102 : {
8103 : struct spdk_thread *thread;
8104 :
8105 132 : SPDK_DEBUGLOG(bdev, "Removing bdev %s from list\n", bdev->name);
8106 :
8107 132 : thread = spdk_get_thread();
8108 132 : if (!thread) {
8109 : /* The user called this from a non-SPDK thread. */
8110 0 : if (cb_fn != NULL) {
8111 0 : cb_fn(cb_arg, -ENOTSUP);
8112 0 : }
8113 0 : return;
8114 : }
8115 :
8116 132 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8117 132 : if (bdev->internal.status == SPDK_BDEV_STATUS_UNREGISTERING ||
8118 132 : bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) {
8119 7 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8120 7 : if (cb_fn) {
8121 0 : cb_fn(cb_arg, -EBUSY);
8122 0 : }
8123 7 : return;
8124 : }
8125 :
8126 125 : spdk_spin_lock(&bdev->internal.spinlock);
8127 125 : bdev->internal.status = SPDK_BDEV_STATUS_UNREGISTERING;
8128 125 : bdev->internal.unregister_cb = cb_fn;
8129 125 : bdev->internal.unregister_ctx = cb_arg;
8130 125 : bdev->internal.unregister_td = thread;
8131 125 : spdk_spin_unlock(&bdev->internal.spinlock);
8132 125 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8133 :
8134 125 : spdk_bdev_set_qd_sampling_period(bdev, 0);
8135 :
8136 125 : spdk_bdev_for_each_channel(bdev, bdev_unregister_abort_channel, bdev,
8137 : bdev_unregister);
8138 132 : }
8139 :
8140 : int
8141 4 : spdk_bdev_unregister_by_name(const char *bdev_name, struct spdk_bdev_module *module,
8142 : spdk_bdev_unregister_cb cb_fn, void *cb_arg)
8143 : {
8144 : struct spdk_bdev_desc *desc;
8145 : struct spdk_bdev *bdev;
8146 : int rc;
8147 :
8148 4 : rc = spdk_bdev_open_ext(bdev_name, false, _tmp_bdev_event_cb, NULL, &desc);
8149 4 : if (rc != 0) {
8150 1 : SPDK_ERRLOG("Failed to open bdev with name: %s\n", bdev_name);
8151 1 : return rc;
8152 : }
8153 :
8154 3 : bdev = spdk_bdev_desc_get_bdev(desc);
8155 :
8156 3 : if (bdev->module != module) {
8157 1 : spdk_bdev_close(desc);
8158 1 : SPDK_ERRLOG("Bdev %s was not registered by the specified module.\n",
8159 : bdev_name);
8160 1 : return -ENODEV;
8161 : }
8162 :
8163 2 : spdk_bdev_unregister(bdev, cb_fn, cb_arg);
8164 :
8165 2 : spdk_bdev_close(desc);
8166 :
8167 2 : return 0;
8168 4 : }
8169 :
8170 : static int
8171 258 : bdev_start_qos(struct spdk_bdev *bdev)
8172 : {
8173 : struct set_qos_limit_ctx *ctx;
8174 :
8175 : /* Enable QoS */
8176 258 : if (bdev->internal.qos && bdev->internal.qos->thread == NULL) {
8177 2 : ctx = calloc(1, sizeof(*ctx));
8178 2 : if (ctx == NULL) {
8179 0 : SPDK_ERRLOG("Failed to allocate memory for QoS context\n");
8180 0 : return -ENOMEM;
8181 : }
8182 2 : ctx->bdev = bdev;
8183 2 : spdk_bdev_for_each_channel(bdev, bdev_enable_qos_msg, ctx, bdev_enable_qos_done);
8184 2 : }
8185 :
8186 258 : return 0;
8187 258 : }
8188 :
8189 : static void
8190 24 : log_already_claimed(enum spdk_log_level level, const int line, const char *func, const char *detail,
8191 : struct spdk_bdev *bdev)
8192 : {
8193 : enum spdk_bdev_claim_type type;
8194 : const char *typename, *modname;
8195 : extern struct spdk_log_flag SPDK_LOG_bdev;
8196 :
8197 24 : assert(spdk_spin_held(&bdev->internal.spinlock));
8198 :
8199 24 : if (level >= SPDK_LOG_INFO && !SPDK_LOG_bdev.enabled) {
8200 0 : return;
8201 : }
8202 :
8203 24 : type = bdev->internal.claim_type;
8204 24 : typename = spdk_bdev_claim_get_name(type);
8205 :
8206 24 : if (type == SPDK_BDEV_CLAIM_EXCL_WRITE) {
8207 6 : modname = bdev->internal.claim.v1.module->name;
8208 12 : spdk_log(level, __FILE__, line, func, "bdev %s %s: type %s by module %s\n",
8209 6 : bdev->name, detail, typename, modname);
8210 6 : return;
8211 : }
8212 :
8213 18 : if (claim_type_is_v2(type)) {
8214 : struct spdk_bdev_module_claim *claim;
8215 :
8216 36 : TAILQ_FOREACH(claim, &bdev->internal.claim.v2.claims, link) {
8217 18 : modname = claim->module->name;
8218 36 : spdk_log(level, __FILE__, line, func, "bdev %s %s: type %s by module %s\n",
8219 18 : bdev->name, detail, typename, modname);
8220 18 : }
8221 18 : return;
8222 : }
8223 :
8224 0 : assert(false);
8225 24 : }
8226 :
8227 : static int
8228 267 : bdev_open(struct spdk_bdev *bdev, bool write, struct spdk_bdev_desc *desc)
8229 : {
8230 : struct spdk_thread *thread;
8231 267 : int rc = 0;
8232 :
8233 267 : thread = spdk_get_thread();
8234 267 : if (!thread) {
8235 0 : SPDK_ERRLOG("Cannot open bdev from non-SPDK thread.\n");
8236 0 : return -ENOTSUP;
8237 : }
8238 :
8239 267 : SPDK_DEBUGLOG(bdev, "Opening descriptor %p for bdev %s on thread %p\n", desc, bdev->name,
8240 : spdk_get_thread());
8241 :
8242 267 : desc->bdev = bdev;
8243 267 : desc->thread = thread;
8244 267 : desc->write = write;
8245 :
8246 267 : spdk_spin_lock(&bdev->internal.spinlock);
8247 267 : if (bdev->internal.status == SPDK_BDEV_STATUS_UNREGISTERING ||
8248 267 : bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) {
8249 3 : spdk_spin_unlock(&bdev->internal.spinlock);
8250 3 : return -ENODEV;
8251 : }
8252 :
8253 264 : if (write && bdev->internal.claim_type != SPDK_BDEV_CLAIM_NONE) {
8254 6 : LOG_ALREADY_CLAIMED_ERROR("already claimed", bdev);
8255 6 : spdk_spin_unlock(&bdev->internal.spinlock);
8256 6 : return -EPERM;
8257 : }
8258 :
8259 258 : rc = bdev_start_qos(bdev);
8260 258 : if (rc != 0) {
8261 0 : SPDK_ERRLOG("Failed to start QoS on bdev %s\n", bdev->name);
8262 0 : spdk_spin_unlock(&bdev->internal.spinlock);
8263 0 : return rc;
8264 : }
8265 :
8266 258 : TAILQ_INSERT_TAIL(&bdev->internal.open_descs, desc, link);
8267 :
8268 258 : spdk_spin_unlock(&bdev->internal.spinlock);
8269 :
8270 258 : return 0;
8271 267 : }
8272 :
8273 : static void
8274 268 : bdev_open_opts_get_defaults(struct spdk_bdev_open_opts *opts, size_t opts_size)
8275 : {
8276 268 : if (!opts) {
8277 0 : SPDK_ERRLOG("opts should not be NULL.\n");
8278 0 : return;
8279 : }
8280 :
8281 268 : if (!opts_size) {
8282 0 : SPDK_ERRLOG("opts_size should not be zero.\n");
8283 0 : return;
8284 : }
8285 :
8286 268 : memset(opts, 0, opts_size);
8287 268 : opts->size = opts_size;
8288 :
8289 : #define FIELD_OK(field) \
8290 : offsetof(struct spdk_bdev_open_opts, field) + sizeof(opts->field) <= opts_size
8291 :
8292 : #define SET_FIELD(field, value) \
8293 : if (FIELD_OK(field)) { \
8294 : opts->field = value; \
8295 : } \
8296 :
8297 268 : SET_FIELD(hide_metadata, false);
8298 :
8299 : #undef FIELD_OK
8300 : #undef SET_FIELD
8301 268 : }
8302 :
8303 : static void
8304 2 : bdev_open_opts_copy(struct spdk_bdev_open_opts *opts,
8305 : const struct spdk_bdev_open_opts *opts_src, size_t opts_size)
8306 : {
8307 2 : assert(opts);
8308 2 : assert(opts_src);
8309 :
8310 : #define SET_FIELD(field) \
8311 : if (offsetof(struct spdk_bdev_open_opts, field) + sizeof(opts->field) <= opts_size) { \
8312 : opts->field = opts_src->field; \
8313 : } \
8314 :
8315 2 : SET_FIELD(hide_metadata);
8316 :
8317 2 : opts->size = opts_src->size;
8318 :
8319 : /* We should not remove this statement, but need to update the assert statement
8320 : * if we add a new field, and also add a corresponding SET_FIELD statement.
8321 : */
8322 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_open_opts) == 16, "Incorrect size");
8323 :
8324 : #undef SET_FIELD
8325 2 : }
8326 :
8327 : void
8328 1 : spdk_bdev_open_opts_init(struct spdk_bdev_open_opts *opts, size_t opts_size)
8329 : {
8330 : struct spdk_bdev_open_opts opts_local;
8331 :
8332 1 : bdev_open_opts_get_defaults(&opts_local, sizeof(opts_local));
8333 1 : bdev_open_opts_copy(opts, &opts_local, opts_size);
8334 1 : }
8335 :
8336 : static int
8337 267 : bdev_desc_alloc(struct spdk_bdev *bdev, spdk_bdev_event_cb_t event_cb, void *event_ctx,
8338 : struct spdk_bdev_open_opts *user_opts, struct spdk_bdev_desc **_desc)
8339 : {
8340 : struct spdk_bdev_desc *desc;
8341 : struct spdk_bdev_open_opts opts;
8342 : unsigned int i;
8343 :
8344 267 : bdev_open_opts_get_defaults(&opts, sizeof(opts));
8345 267 : if (user_opts != NULL) {
8346 1 : bdev_open_opts_copy(&opts, user_opts, user_opts->size);
8347 1 : }
8348 :
8349 267 : desc = calloc(1, sizeof(*desc));
8350 267 : if (desc == NULL) {
8351 0 : SPDK_ERRLOG("Failed to allocate memory for bdev descriptor\n");
8352 0 : return -ENOMEM;
8353 : }
8354 :
8355 267 : desc->opts = opts;
8356 :
8357 267 : TAILQ_INIT(&desc->pending_media_events);
8358 267 : TAILQ_INIT(&desc->free_media_events);
8359 :
8360 267 : desc->memory_domains_supported = spdk_bdev_get_memory_domains(bdev, NULL, 0) > 0;
8361 267 : desc->callback.event_fn = event_cb;
8362 267 : desc->callback.ctx = event_ctx;
8363 267 : spdk_spin_init(&desc->spinlock);
8364 :
8365 267 : if (desc->opts.hide_metadata) {
8366 1 : if (spdk_bdev_is_md_separate(bdev)) {
8367 0 : SPDK_ERRLOG("hide_metadata option is not supported with separate metadata.\n");
8368 0 : bdev_desc_free(desc);
8369 0 : return -EINVAL;
8370 : }
8371 1 : }
8372 :
8373 267 : if (bdev->media_events) {
8374 0 : desc->media_events_buffer = calloc(MEDIA_EVENT_POOL_SIZE,
8375 : sizeof(*desc->media_events_buffer));
8376 0 : if (desc->media_events_buffer == NULL) {
8377 0 : SPDK_ERRLOG("Failed to initialize media event pool\n");
8378 0 : bdev_desc_free(desc);
8379 0 : return -ENOMEM;
8380 : }
8381 :
8382 0 : for (i = 0; i < MEDIA_EVENT_POOL_SIZE; ++i) {
8383 0 : TAILQ_INSERT_TAIL(&desc->free_media_events,
8384 : &desc->media_events_buffer[i], tailq);
8385 0 : }
8386 0 : }
8387 :
8388 267 : if (bdev->fn_table->accel_sequence_supported != NULL) {
8389 0 : for (i = 0; i < SPDK_BDEV_NUM_IO_TYPES; ++i) {
8390 0 : desc->accel_sequence_supported[i] =
8391 0 : bdev->fn_table->accel_sequence_supported(bdev->ctxt,
8392 0 : (enum spdk_bdev_io_type)i);
8393 0 : }
8394 0 : }
8395 :
8396 267 : *_desc = desc;
8397 :
8398 267 : return 0;
8399 267 : }
8400 :
8401 : static int
8402 130 : bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
8403 : void *event_ctx, struct spdk_bdev_open_opts *opts,
8404 : struct spdk_bdev_desc **_desc)
8405 : {
8406 : struct spdk_bdev_desc *desc;
8407 : struct spdk_bdev *bdev;
8408 : int rc;
8409 :
8410 130 : bdev = bdev_get_by_name(bdev_name);
8411 :
8412 130 : if (bdev == NULL) {
8413 1 : SPDK_NOTICELOG("Currently unable to find bdev with name: %s\n", bdev_name);
8414 1 : return -ENODEV;
8415 : }
8416 :
8417 129 : rc = bdev_desc_alloc(bdev, event_cb, event_ctx, opts, &desc);
8418 129 : if (rc != 0) {
8419 0 : return rc;
8420 : }
8421 :
8422 129 : rc = bdev_open(bdev, write, desc);
8423 129 : if (rc != 0) {
8424 7 : bdev_desc_free(desc);
8425 7 : desc = NULL;
8426 7 : }
8427 :
8428 129 : *_desc = desc;
8429 :
8430 129 : return rc;
8431 130 : }
8432 :
8433 : int
8434 132 : spdk_bdev_open_ext_v2(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
8435 : void *event_ctx, struct spdk_bdev_open_opts *opts,
8436 : struct spdk_bdev_desc **_desc)
8437 : {
8438 : int rc;
8439 :
8440 132 : if (event_cb == NULL) {
8441 2 : SPDK_ERRLOG("Missing event callback function\n");
8442 2 : return -EINVAL;
8443 : }
8444 :
8445 130 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8446 130 : rc = bdev_open_ext(bdev_name, write, event_cb, event_ctx, opts, _desc);
8447 130 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8448 :
8449 130 : return rc;
8450 132 : }
8451 :
8452 : int
8453 130 : spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
8454 : void *event_ctx, struct spdk_bdev_desc **_desc)
8455 : {
8456 130 : return spdk_bdev_open_ext_v2(bdev_name, write, event_cb, event_ctx, NULL, _desc);
8457 : }
8458 :
8459 : struct spdk_bdev_open_async_ctx {
8460 : char *bdev_name;
8461 : spdk_bdev_event_cb_t event_cb;
8462 : void *event_ctx;
8463 : bool write;
8464 : int rc;
8465 : spdk_bdev_open_async_cb_t cb_fn;
8466 : void *cb_arg;
8467 : struct spdk_bdev_desc *desc;
8468 : struct spdk_bdev_open_async_opts opts;
8469 : uint64_t start_ticks;
8470 : struct spdk_thread *orig_thread;
8471 : struct spdk_poller *poller;
8472 : TAILQ_ENTRY(spdk_bdev_open_async_ctx) tailq;
8473 : };
8474 :
8475 : static void
8476 0 : bdev_open_async_done(void *arg)
8477 : {
8478 0 : struct spdk_bdev_open_async_ctx *ctx = arg;
8479 :
8480 0 : ctx->cb_fn(ctx->desc, ctx->rc, ctx->cb_arg);
8481 :
8482 0 : free(ctx->bdev_name);
8483 0 : free(ctx);
8484 0 : }
8485 :
8486 : static void
8487 0 : bdev_open_async_cancel(void *arg)
8488 : {
8489 0 : struct spdk_bdev_open_async_ctx *ctx = arg;
8490 :
8491 0 : assert(ctx->rc == -ESHUTDOWN);
8492 :
8493 0 : spdk_poller_unregister(&ctx->poller);
8494 :
8495 0 : bdev_open_async_done(ctx);
8496 0 : }
8497 :
8498 : /* This is called when the bdev library finishes at shutdown. */
8499 : static void
8500 65 : bdev_open_async_fini(void)
8501 : {
8502 : struct spdk_bdev_open_async_ctx *ctx, *tmp_ctx;
8503 :
8504 65 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8505 65 : TAILQ_FOREACH_SAFE(ctx, &g_bdev_mgr.async_bdev_opens, tailq, tmp_ctx) {
8506 0 : TAILQ_REMOVE(&g_bdev_mgr.async_bdev_opens, ctx, tailq);
8507 : /*
8508 : * We have to move to ctx->orig_thread to unregister ctx->poller.
8509 : * However, there is a chance that ctx->poller is executed before
8510 : * message is executed, which could result in bdev_open_async_done()
8511 : * being called twice. To avoid such race condition, set ctx->rc to
8512 : * -ESHUTDOWN.
8513 : */
8514 0 : ctx->rc = -ESHUTDOWN;
8515 0 : spdk_thread_send_msg(ctx->orig_thread, bdev_open_async_cancel, ctx);
8516 0 : }
8517 65 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8518 65 : }
8519 :
8520 : static int bdev_open_async(void *arg);
8521 :
8522 : static void
8523 0 : _bdev_open_async(struct spdk_bdev_open_async_ctx *ctx)
8524 : {
8525 : uint64_t timeout_ticks;
8526 :
8527 0 : if (ctx->rc == -ESHUTDOWN) {
8528 : /* This context is being canceled. Do nothing. */
8529 0 : return;
8530 : }
8531 :
8532 0 : ctx->rc = bdev_open_ext(ctx->bdev_name, ctx->write, ctx->event_cb, ctx->event_ctx,
8533 0 : NULL, &ctx->desc);
8534 0 : if (ctx->rc == 0 || ctx->opts.timeout_ms == 0) {
8535 0 : goto exit;
8536 : }
8537 :
8538 0 : timeout_ticks = ctx->start_ticks + ctx->opts.timeout_ms * spdk_get_ticks_hz() / 1000ull;
8539 0 : if (spdk_get_ticks() >= timeout_ticks) {
8540 0 : SPDK_ERRLOG("Timed out while waiting for bdev '%s' to appear\n", ctx->bdev_name);
8541 0 : ctx->rc = -ETIMEDOUT;
8542 0 : goto exit;
8543 : }
8544 :
8545 0 : return;
8546 :
8547 : exit:
8548 0 : spdk_poller_unregister(&ctx->poller);
8549 0 : TAILQ_REMOVE(&g_bdev_mgr.async_bdev_opens, ctx, tailq);
8550 :
8551 : /* Completion callback is processed after stack unwinding. */
8552 0 : spdk_thread_send_msg(ctx->orig_thread, bdev_open_async_done, ctx);
8553 0 : }
8554 :
8555 : static int
8556 0 : bdev_open_async(void *arg)
8557 : {
8558 0 : struct spdk_bdev_open_async_ctx *ctx = arg;
8559 :
8560 0 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8561 :
8562 0 : _bdev_open_async(ctx);
8563 :
8564 0 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8565 :
8566 0 : return SPDK_POLLER_BUSY;
8567 : }
8568 :
8569 : static void
8570 0 : bdev_open_async_opts_copy(struct spdk_bdev_open_async_opts *opts,
8571 : struct spdk_bdev_open_async_opts *opts_src,
8572 : size_t size)
8573 : {
8574 0 : assert(opts);
8575 0 : assert(opts_src);
8576 :
8577 0 : opts->size = size;
8578 :
8579 : #define SET_FIELD(field) \
8580 : if (offsetof(struct spdk_bdev_open_async_opts, field) + sizeof(opts->field) <= size) { \
8581 : opts->field = opts_src->field; \
8582 : } \
8583 :
8584 0 : SET_FIELD(timeout_ms);
8585 :
8586 : /* Do not remove this statement, you should always update this statement when you adding a new field,
8587 : * and do not forget to add the SET_FIELD statement for your added field. */
8588 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_open_async_opts) == 16, "Incorrect size");
8589 :
8590 : #undef SET_FIELD
8591 0 : }
8592 :
8593 : static void
8594 0 : bdev_open_async_opts_get_default(struct spdk_bdev_open_async_opts *opts, size_t size)
8595 : {
8596 0 : assert(opts);
8597 :
8598 0 : opts->size = size;
8599 :
8600 : #define SET_FIELD(field, value) \
8601 : if (offsetof(struct spdk_bdev_open_async_opts, field) + sizeof(opts->field) <= size) { \
8602 : opts->field = value; \
8603 : } \
8604 :
8605 0 : SET_FIELD(timeout_ms, 0);
8606 :
8607 : #undef SET_FIELD
8608 0 : }
8609 :
8610 : int
8611 0 : spdk_bdev_open_async(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
8612 : void *event_ctx, struct spdk_bdev_open_async_opts *opts,
8613 : spdk_bdev_open_async_cb_t open_cb, void *open_cb_arg)
8614 : {
8615 : struct spdk_bdev_open_async_ctx *ctx;
8616 :
8617 0 : if (event_cb == NULL) {
8618 0 : SPDK_ERRLOG("Missing event callback function\n");
8619 0 : return -EINVAL;
8620 : }
8621 :
8622 0 : if (open_cb == NULL) {
8623 0 : SPDK_ERRLOG("Missing open callback function\n");
8624 0 : return -EINVAL;
8625 : }
8626 :
8627 0 : if (opts != NULL && opts->size == 0) {
8628 0 : SPDK_ERRLOG("size in the options structure should not be zero\n");
8629 0 : return -EINVAL;
8630 : }
8631 :
8632 0 : ctx = calloc(1, sizeof(*ctx));
8633 0 : if (ctx == NULL) {
8634 0 : SPDK_ERRLOG("Failed to allocate open context\n");
8635 0 : return -ENOMEM;
8636 : }
8637 :
8638 0 : ctx->bdev_name = strdup(bdev_name);
8639 0 : if (ctx->bdev_name == NULL) {
8640 0 : SPDK_ERRLOG("Failed to duplicate bdev_name\n");
8641 0 : free(ctx);
8642 0 : return -ENOMEM;
8643 : }
8644 :
8645 0 : ctx->poller = SPDK_POLLER_REGISTER(bdev_open_async, ctx, 100 * 1000);
8646 0 : if (ctx->poller == NULL) {
8647 0 : SPDK_ERRLOG("Failed to register bdev_open_async poller\n");
8648 0 : free(ctx->bdev_name);
8649 0 : free(ctx);
8650 0 : return -ENOMEM;
8651 : }
8652 :
8653 0 : ctx->cb_fn = open_cb;
8654 0 : ctx->cb_arg = open_cb_arg;
8655 0 : ctx->write = write;
8656 0 : ctx->event_cb = event_cb;
8657 0 : ctx->event_ctx = event_ctx;
8658 0 : ctx->orig_thread = spdk_get_thread();
8659 0 : ctx->start_ticks = spdk_get_ticks();
8660 :
8661 0 : bdev_open_async_opts_get_default(&ctx->opts, sizeof(ctx->opts));
8662 0 : if (opts != NULL) {
8663 0 : bdev_open_async_opts_copy(&ctx->opts, opts, opts->size);
8664 0 : }
8665 :
8666 0 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8667 :
8668 0 : TAILQ_INSERT_TAIL(&g_bdev_mgr.async_bdev_opens, ctx, tailq);
8669 0 : _bdev_open_async(ctx);
8670 :
8671 0 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8672 :
8673 0 : return 0;
8674 0 : }
8675 :
8676 : static void
8677 258 : bdev_close(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc)
8678 : {
8679 : int rc;
8680 :
8681 258 : spdk_spin_lock(&bdev->internal.spinlock);
8682 258 : spdk_spin_lock(&desc->spinlock);
8683 :
8684 258 : TAILQ_REMOVE(&bdev->internal.open_descs, desc, link);
8685 :
8686 258 : desc->closed = true;
8687 :
8688 258 : if (desc->claim != NULL) {
8689 16 : bdev_desc_release_claims(desc);
8690 16 : }
8691 :
8692 258 : if (0 == desc->refs) {
8693 247 : spdk_spin_unlock(&desc->spinlock);
8694 247 : bdev_desc_free(desc);
8695 247 : } else {
8696 11 : spdk_spin_unlock(&desc->spinlock);
8697 : }
8698 :
8699 : /* If no more descriptors, kill QoS channel */
8700 258 : if (bdev->internal.qos && TAILQ_EMPTY(&bdev->internal.open_descs)) {
8701 7 : SPDK_DEBUGLOG(bdev, "Closed last descriptor for bdev %s on thread %p. Stopping QoS.\n",
8702 : bdev->name, spdk_get_thread());
8703 :
8704 7 : if (bdev_qos_destroy(bdev)) {
8705 : /* There isn't anything we can do to recover here. Just let the
8706 : * old QoS poller keep running. The QoS handling won't change
8707 : * cores when the user allocates a new channel, but it won't break. */
8708 0 : SPDK_ERRLOG("Unable to shut down QoS poller. It will continue running on the current thread.\n");
8709 0 : }
8710 7 : }
8711 :
8712 258 : if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->internal.open_descs)) {
8713 15 : rc = bdev_unregister_unsafe(bdev);
8714 15 : spdk_spin_unlock(&bdev->internal.spinlock);
8715 :
8716 15 : if (rc == 0) {
8717 15 : spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb);
8718 15 : }
8719 15 : } else {
8720 243 : spdk_spin_unlock(&bdev->internal.spinlock);
8721 : }
8722 258 : }
8723 :
8724 : void
8725 122 : spdk_bdev_close(struct spdk_bdev_desc *desc)
8726 : {
8727 122 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
8728 :
8729 122 : SPDK_DEBUGLOG(bdev, "Closing descriptor %p for bdev %s on thread %p\n", desc, bdev->name,
8730 : spdk_get_thread());
8731 :
8732 122 : assert(desc->thread == spdk_get_thread());
8733 :
8734 122 : spdk_poller_unregister(&desc->io_timeout_poller);
8735 :
8736 122 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8737 :
8738 122 : bdev_close(bdev, desc);
8739 :
8740 122 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8741 122 : }
8742 :
8743 : int32_t
8744 3 : spdk_bdev_get_numa_id(struct spdk_bdev *bdev)
8745 : {
8746 3 : if (bdev->numa.id_valid) {
8747 2 : return bdev->numa.id;
8748 : } else {
8749 1 : return SPDK_ENV_NUMA_ID_ANY;
8750 : }
8751 3 : }
8752 :
8753 : static void
8754 125 : bdev_register_finished(void *arg)
8755 : {
8756 125 : struct spdk_bdev_desc *desc = arg;
8757 125 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
8758 :
8759 125 : spdk_notify_send("bdev_register", spdk_bdev_get_name(bdev));
8760 :
8761 125 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8762 :
8763 125 : bdev_close(bdev, desc);
8764 :
8765 125 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8766 125 : }
8767 :
8768 : int
8769 128 : spdk_bdev_register(struct spdk_bdev *bdev)
8770 : {
8771 : struct spdk_bdev_desc *desc;
8772 128 : struct spdk_thread *thread = spdk_get_thread();
8773 : int rc;
8774 :
8775 128 : if (spdk_unlikely(!spdk_thread_is_app_thread(NULL))) {
8776 1 : SPDK_ERRLOG("Cannot register bdev %s on thread %p (%s)\n", bdev->name, thread,
8777 : thread ? spdk_thread_get_name(thread) : "null");
8778 1 : return -EINVAL;
8779 : }
8780 :
8781 127 : rc = bdev_register(bdev);
8782 127 : if (rc != 0) {
8783 2 : return rc;
8784 : }
8785 :
8786 : /* A descriptor is opened to prevent bdev deletion during examination */
8787 125 : rc = bdev_desc_alloc(bdev, _tmp_bdev_event_cb, NULL, NULL, &desc);
8788 125 : if (rc != 0) {
8789 0 : spdk_bdev_unregister(bdev, NULL, NULL);
8790 0 : return rc;
8791 : }
8792 :
8793 125 : rc = bdev_open(bdev, false, desc);
8794 125 : if (rc != 0) {
8795 0 : bdev_desc_free(desc);
8796 0 : spdk_bdev_unregister(bdev, NULL, NULL);
8797 0 : return rc;
8798 : }
8799 :
8800 : /* Examine configuration before initializing I/O */
8801 125 : bdev_examine(bdev);
8802 :
8803 125 : rc = spdk_bdev_wait_for_examine(bdev_register_finished, desc);
8804 125 : if (rc != 0) {
8805 0 : bdev_close(bdev, desc);
8806 0 : spdk_bdev_unregister(bdev, NULL, NULL);
8807 0 : }
8808 :
8809 125 : return rc;
8810 128 : }
8811 :
8812 : int
8813 26 : spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
8814 : struct spdk_bdev_module *module)
8815 : {
8816 26 : spdk_spin_lock(&bdev->internal.spinlock);
8817 :
8818 26 : if (bdev->internal.claim_type != SPDK_BDEV_CLAIM_NONE) {
8819 6 : LOG_ALREADY_CLAIMED_ERROR("already claimed", bdev);
8820 6 : spdk_spin_unlock(&bdev->internal.spinlock);
8821 6 : return -EPERM;
8822 : }
8823 :
8824 20 : if (desc && !desc->write) {
8825 5 : desc->write = true;
8826 5 : }
8827 :
8828 20 : bdev->internal.claim_type = SPDK_BDEV_CLAIM_EXCL_WRITE;
8829 20 : bdev->internal.claim.v1.module = module;
8830 :
8831 20 : spdk_spin_unlock(&bdev->internal.spinlock);
8832 20 : return 0;
8833 26 : }
8834 :
8835 : void
8836 8 : spdk_bdev_module_release_bdev(struct spdk_bdev *bdev)
8837 : {
8838 8 : spdk_spin_lock(&bdev->internal.spinlock);
8839 :
8840 8 : assert(bdev->internal.claim.v1.module != NULL);
8841 8 : assert(bdev->internal.claim_type == SPDK_BDEV_CLAIM_EXCL_WRITE);
8842 8 : bdev->internal.claim_type = SPDK_BDEV_CLAIM_NONE;
8843 8 : bdev->internal.claim.v1.module = NULL;
8844 :
8845 8 : spdk_spin_unlock(&bdev->internal.spinlock);
8846 8 : }
8847 :
8848 : /*
8849 : * Start claims v2
8850 : */
8851 :
8852 : const char *
8853 24 : spdk_bdev_claim_get_name(enum spdk_bdev_claim_type type)
8854 : {
8855 24 : switch (type) {
8856 : case SPDK_BDEV_CLAIM_NONE:
8857 0 : return "not_claimed";
8858 : case SPDK_BDEV_CLAIM_EXCL_WRITE:
8859 6 : return "exclusive_write";
8860 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE:
8861 7 : return "read_many_write_one";
8862 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE:
8863 5 : return "read_many_write_none";
8864 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED:
8865 6 : return "read_many_write_many";
8866 : default:
8867 0 : break;
8868 : }
8869 0 : return "invalid_claim";
8870 24 : }
8871 :
8872 : static bool
8873 96 : claim_type_is_v2(enum spdk_bdev_claim_type type)
8874 : {
8875 96 : switch (type) {
8876 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE:
8877 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE:
8878 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED:
8879 96 : return true;
8880 : default:
8881 0 : break;
8882 : }
8883 0 : return false;
8884 96 : }
8885 :
8886 : /* Returns true if taking a claim with desc->write == false should make the descriptor writable. */
8887 : static bool
8888 13 : claim_type_promotes_to_write(enum spdk_bdev_claim_type type)
8889 : {
8890 13 : switch (type) {
8891 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE:
8892 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED:
8893 5 : return true;
8894 : default:
8895 8 : break;
8896 : }
8897 8 : return false;
8898 13 : }
8899 :
8900 : void
8901 44 : spdk_bdev_claim_opts_init(struct spdk_bdev_claim_opts *opts, size_t size)
8902 : {
8903 44 : if (opts == NULL) {
8904 0 : SPDK_ERRLOG("opts should not be NULL\n");
8905 0 : assert(opts != NULL);
8906 0 : return;
8907 : }
8908 44 : if (size == 0) {
8909 0 : SPDK_ERRLOG("size should not be zero\n");
8910 0 : assert(size != 0);
8911 0 : return;
8912 : }
8913 :
8914 44 : memset(opts, 0, size);
8915 44 : opts->opts_size = size;
8916 :
8917 : #define FIELD_OK(field) \
8918 : offsetof(struct spdk_bdev_claim_opts, field) + sizeof(opts->field) <= size
8919 :
8920 : #define SET_FIELD(field, value) \
8921 : if (FIELD_OK(field)) { \
8922 : opts->field = value; \
8923 : } \
8924 :
8925 44 : SET_FIELD(shared_claim_key, 0);
8926 :
8927 : #undef FIELD_OK
8928 : #undef SET_FIELD
8929 44 : }
8930 :
8931 : static int
8932 22 : claim_opts_copy(struct spdk_bdev_claim_opts *src, struct spdk_bdev_claim_opts *dst)
8933 : {
8934 22 : if (src->opts_size == 0) {
8935 0 : SPDK_ERRLOG("size should not be zero\n");
8936 0 : return -1;
8937 : }
8938 :
8939 22 : memset(dst, 0, sizeof(*dst));
8940 22 : dst->opts_size = src->opts_size;
8941 :
8942 : #define FIELD_OK(field) \
8943 : offsetof(struct spdk_bdev_claim_opts, field) + sizeof(src->field) <= src->opts_size
8944 :
8945 : #define SET_FIELD(field) \
8946 : if (FIELD_OK(field)) { \
8947 : dst->field = src->field; \
8948 : } \
8949 :
8950 22 : if (FIELD_OK(name)) {
8951 22 : snprintf(dst->name, sizeof(dst->name), "%s", src->name);
8952 22 : }
8953 :
8954 22 : SET_FIELD(shared_claim_key);
8955 :
8956 : /* You should not remove this statement, but need to update the assert statement
8957 : * if you add a new field, and also add a corresponding SET_FIELD statement */
8958 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_claim_opts) == 48, "Incorrect size");
8959 :
8960 : #undef FIELD_OK
8961 : #undef SET_FIELD
8962 22 : return 0;
8963 22 : }
8964 :
8965 : /* Returns 0 if a read-write-once claim can be taken. */
8966 : static int
8967 9 : claim_verify_rwo(struct spdk_bdev_desc *desc, enum spdk_bdev_claim_type type,
8968 : struct spdk_bdev_claim_opts *opts, struct spdk_bdev_module *module)
8969 : {
8970 9 : struct spdk_bdev *bdev = desc->bdev;
8971 : struct spdk_bdev_desc *open_desc;
8972 :
8973 9 : assert(spdk_spin_held(&bdev->internal.spinlock));
8974 9 : assert(type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE);
8975 :
8976 9 : if (opts->shared_claim_key != 0) {
8977 1 : SPDK_ERRLOG("%s: key option not supported with read-write-once claims\n",
8978 : bdev->name);
8979 1 : return -EINVAL;
8980 : }
8981 8 : if (bdev->internal.claim_type != SPDK_BDEV_CLAIM_NONE) {
8982 1 : LOG_ALREADY_CLAIMED_ERROR("already claimed", bdev);
8983 1 : return -EPERM;
8984 : }
8985 7 : if (desc->claim != NULL) {
8986 0 : SPDK_NOTICELOG("%s: descriptor already claimed bdev with module %s\n",
8987 : bdev->name, desc->claim->module->name);
8988 0 : return -EPERM;
8989 : }
8990 14 : TAILQ_FOREACH(open_desc, &bdev->internal.open_descs, link) {
8991 9 : if (desc != open_desc && open_desc->write) {
8992 2 : SPDK_NOTICELOG("%s: Cannot obtain read-write-once claim while "
8993 : "another descriptor is open for writing\n",
8994 : bdev->name);
8995 2 : return -EPERM;
8996 : }
8997 7 : }
8998 :
8999 5 : return 0;
9000 9 : }
9001 :
9002 : /* Returns 0 if a read-only-many claim can be taken. */
9003 : static int
9004 12 : claim_verify_rom(struct spdk_bdev_desc *desc, enum spdk_bdev_claim_type type,
9005 : struct spdk_bdev_claim_opts *opts, struct spdk_bdev_module *module)
9006 : {
9007 12 : struct spdk_bdev *bdev = desc->bdev;
9008 : struct spdk_bdev_desc *open_desc;
9009 :
9010 12 : assert(spdk_spin_held(&bdev->internal.spinlock));
9011 12 : assert(type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE);
9012 12 : assert(desc->claim == NULL);
9013 :
9014 12 : if (desc->write) {
9015 3 : SPDK_ERRLOG("%s: Cannot obtain read-only-many claim with writable descriptor\n",
9016 : bdev->name);
9017 3 : return -EINVAL;
9018 : }
9019 9 : if (opts->shared_claim_key != 0) {
9020 1 : SPDK_ERRLOG("%s: key option not supported with read-only-may claims\n", bdev->name);
9021 1 : return -EINVAL;
9022 : }
9023 8 : if (bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE) {
9024 15 : TAILQ_FOREACH(open_desc, &bdev->internal.open_descs, link) {
9025 9 : if (open_desc->write) {
9026 0 : SPDK_NOTICELOG("%s: Cannot obtain read-only-many claim while "
9027 : "another descriptor is open for writing\n",
9028 : bdev->name);
9029 0 : return -EPERM;
9030 : }
9031 9 : }
9032 6 : }
9033 :
9034 8 : return 0;
9035 12 : }
9036 :
9037 : /* Returns 0 if a read-write-many claim can be taken. */
9038 : static int
9039 8 : claim_verify_rwm(struct spdk_bdev_desc *desc, enum spdk_bdev_claim_type type,
9040 : struct spdk_bdev_claim_opts *opts, struct spdk_bdev_module *module)
9041 : {
9042 8 : struct spdk_bdev *bdev = desc->bdev;
9043 : struct spdk_bdev_desc *open_desc;
9044 :
9045 8 : assert(spdk_spin_held(&bdev->internal.spinlock));
9046 8 : assert(type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED);
9047 8 : assert(desc->claim == NULL);
9048 :
9049 8 : if (opts->shared_claim_key == 0) {
9050 2 : SPDK_ERRLOG("%s: shared_claim_key option required with read-write-may claims\n",
9051 : bdev->name);
9052 2 : return -EINVAL;
9053 : }
9054 6 : switch (bdev->internal.claim_type) {
9055 : case SPDK_BDEV_CLAIM_NONE:
9056 7 : TAILQ_FOREACH(open_desc, &bdev->internal.open_descs, link) {
9057 5 : if (open_desc == desc) {
9058 3 : continue;
9059 : }
9060 2 : if (open_desc->write) {
9061 2 : SPDK_NOTICELOG("%s: Cannot obtain read-write-many claim while "
9062 : "another descriptor is open for writing without a "
9063 : "claim\n", bdev->name);
9064 2 : return -EPERM;
9065 : }
9066 0 : }
9067 2 : break;
9068 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED:
9069 2 : if (opts->shared_claim_key != bdev->internal.claim.v2.key) {
9070 1 : LOG_ALREADY_CLAIMED_ERROR("already claimed with another key", bdev);
9071 1 : return -EPERM;
9072 : }
9073 1 : break;
9074 : default:
9075 0 : LOG_ALREADY_CLAIMED_ERROR("already claimed", bdev);
9076 0 : return -EBUSY;
9077 : }
9078 :
9079 3 : return 0;
9080 8 : }
9081 :
9082 : /* Updates desc and its bdev with a v2 claim. */
9083 : static int
9084 16 : claim_bdev(struct spdk_bdev_desc *desc, enum spdk_bdev_claim_type type,
9085 : struct spdk_bdev_claim_opts *opts, struct spdk_bdev_module *module)
9086 : {
9087 16 : struct spdk_bdev *bdev = desc->bdev;
9088 : struct spdk_bdev_module_claim *claim;
9089 :
9090 16 : assert(spdk_spin_held(&bdev->internal.spinlock));
9091 16 : assert(claim_type_is_v2(type));
9092 16 : assert(desc->claim == NULL);
9093 :
9094 16 : claim = calloc(1, sizeof(*desc->claim));
9095 16 : if (claim == NULL) {
9096 0 : SPDK_ERRLOG("%s: out of memory while allocating claim\n", bdev->name);
9097 0 : return -ENOMEM;
9098 : }
9099 16 : claim->module = module;
9100 16 : claim->desc = desc;
9101 : SPDK_STATIC_ASSERT(sizeof(claim->name) == sizeof(opts->name), "sizes must match");
9102 16 : memcpy(claim->name, opts->name, sizeof(claim->name));
9103 16 : desc->claim = claim;
9104 :
9105 16 : if (bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE) {
9106 13 : bdev->internal.claim_type = type;
9107 13 : TAILQ_INIT(&bdev->internal.claim.v2.claims);
9108 13 : bdev->internal.claim.v2.key = opts->shared_claim_key;
9109 13 : }
9110 16 : assert(type == bdev->internal.claim_type);
9111 :
9112 16 : TAILQ_INSERT_TAIL(&bdev->internal.claim.v2.claims, claim, link);
9113 :
9114 16 : if (!desc->write && claim_type_promotes_to_write(type)) {
9115 5 : desc->write = true;
9116 5 : }
9117 :
9118 16 : return 0;
9119 16 : }
9120 :
9121 : int
9122 39 : spdk_bdev_module_claim_bdev_desc(struct spdk_bdev_desc *desc, enum spdk_bdev_claim_type type,
9123 : struct spdk_bdev_claim_opts *_opts,
9124 : struct spdk_bdev_module *module)
9125 : {
9126 : struct spdk_bdev *bdev;
9127 : struct spdk_bdev_claim_opts opts;
9128 39 : int rc = 0;
9129 :
9130 39 : if (desc == NULL) {
9131 0 : SPDK_ERRLOG("descriptor must not be NULL\n");
9132 0 : return -EINVAL;
9133 : }
9134 :
9135 39 : bdev = desc->bdev;
9136 :
9137 39 : if (_opts == NULL) {
9138 17 : spdk_bdev_claim_opts_init(&opts, sizeof(opts));
9139 39 : } else if (claim_opts_copy(_opts, &opts) != 0) {
9140 0 : return -EINVAL;
9141 : }
9142 :
9143 39 : spdk_spin_lock(&bdev->internal.spinlock);
9144 :
9145 39 : if (bdev->internal.claim_type != SPDK_BDEV_CLAIM_NONE &&
9146 15 : bdev->internal.claim_type != type) {
9147 10 : LOG_ALREADY_CLAIMED_ERROR("already claimed", bdev);
9148 10 : spdk_spin_unlock(&bdev->internal.spinlock);
9149 10 : return -EPERM;
9150 : }
9151 :
9152 29 : if (claim_type_is_v2(type) && desc->claim != NULL) {
9153 0 : SPDK_ERRLOG("%s: descriptor already has %s claim with name '%s'\n",
9154 : bdev->name, spdk_bdev_claim_get_name(type), desc->claim->name);
9155 0 : spdk_spin_unlock(&bdev->internal.spinlock);
9156 0 : return -EPERM;
9157 : }
9158 :
9159 29 : switch (type) {
9160 : case SPDK_BDEV_CLAIM_EXCL_WRITE:
9161 0 : spdk_spin_unlock(&bdev->internal.spinlock);
9162 0 : return spdk_bdev_module_claim_bdev(bdev, desc, module);
9163 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE:
9164 9 : rc = claim_verify_rwo(desc, type, &opts, module);
9165 9 : break;
9166 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE:
9167 12 : rc = claim_verify_rom(desc, type, &opts, module);
9168 12 : break;
9169 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED:
9170 8 : rc = claim_verify_rwm(desc, type, &opts, module);
9171 8 : break;
9172 : default:
9173 0 : SPDK_ERRLOG("%s: claim type %d not supported\n", bdev->name, type);
9174 0 : rc = -ENOTSUP;
9175 0 : }
9176 :
9177 29 : if (rc == 0) {
9178 16 : rc = claim_bdev(desc, type, &opts, module);
9179 16 : }
9180 :
9181 29 : spdk_spin_unlock(&bdev->internal.spinlock);
9182 29 : return rc;
9183 39 : }
9184 :
9185 : static void
9186 13 : claim_reset(struct spdk_bdev *bdev)
9187 : {
9188 13 : assert(spdk_spin_held(&bdev->internal.spinlock));
9189 13 : assert(claim_type_is_v2(bdev->internal.claim_type));
9190 13 : assert(TAILQ_EMPTY(&bdev->internal.claim.v2.claims));
9191 :
9192 13 : memset(&bdev->internal.claim, 0, sizeof(bdev->internal.claim));
9193 13 : bdev->internal.claim_type = SPDK_BDEV_CLAIM_NONE;
9194 13 : }
9195 :
9196 : static void
9197 16 : bdev_desc_release_claims(struct spdk_bdev_desc *desc)
9198 : {
9199 16 : struct spdk_bdev *bdev = desc->bdev;
9200 :
9201 16 : assert(spdk_spin_held(&bdev->internal.spinlock));
9202 16 : assert(claim_type_is_v2(bdev->internal.claim_type));
9203 :
9204 16 : if (bdev->internal.examine_in_progress == 0) {
9205 16 : TAILQ_REMOVE(&bdev->internal.claim.v2.claims, desc->claim, link);
9206 16 : free(desc->claim);
9207 16 : if (TAILQ_EMPTY(&bdev->internal.claim.v2.claims)) {
9208 13 : claim_reset(bdev);
9209 13 : }
9210 16 : } else {
9211 : /* This is a dead claim that will be cleaned up when bdev_examine() is done. */
9212 0 : desc->claim->module = NULL;
9213 0 : desc->claim->desc = NULL;
9214 : }
9215 16 : desc->claim = NULL;
9216 16 : }
9217 :
9218 : /*
9219 : * End claims v2
9220 : */
9221 :
9222 : struct spdk_bdev *
9223 1187 : spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc)
9224 : {
9225 1187 : assert(desc != NULL);
9226 1187 : return desc->bdev;
9227 : }
9228 :
9229 : int
9230 1 : spdk_for_each_bdev(void *ctx, spdk_for_each_bdev_fn fn)
9231 : {
9232 : struct spdk_bdev *bdev, *tmp;
9233 : struct spdk_bdev_desc *desc;
9234 1 : int rc = 0;
9235 :
9236 1 : assert(fn != NULL);
9237 :
9238 1 : spdk_spin_lock(&g_bdev_mgr.spinlock);
9239 1 : bdev = spdk_bdev_first();
9240 9 : while (bdev != NULL) {
9241 8 : rc = bdev_desc_alloc(bdev, _tmp_bdev_event_cb, NULL, NULL, &desc);
9242 8 : if (rc != 0) {
9243 0 : break;
9244 : }
9245 8 : rc = bdev_open(bdev, false, desc);
9246 8 : if (rc != 0) {
9247 1 : bdev_desc_free(desc);
9248 1 : if (rc == -ENODEV) {
9249 : /* Ignore the error and move to the next bdev. */
9250 1 : rc = 0;
9251 1 : bdev = spdk_bdev_next(bdev);
9252 1 : continue;
9253 : }
9254 0 : break;
9255 : }
9256 7 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
9257 :
9258 7 : rc = fn(ctx, bdev);
9259 :
9260 7 : spdk_spin_lock(&g_bdev_mgr.spinlock);
9261 7 : tmp = spdk_bdev_next(bdev);
9262 7 : bdev_close(bdev, desc);
9263 7 : if (rc != 0) {
9264 0 : break;
9265 : }
9266 7 : bdev = tmp;
9267 : }
9268 1 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
9269 :
9270 1 : return rc;
9271 : }
9272 :
9273 : int
9274 1 : spdk_for_each_bdev_leaf(void *ctx, spdk_for_each_bdev_fn fn)
9275 : {
9276 : struct spdk_bdev *bdev, *tmp;
9277 : struct spdk_bdev_desc *desc;
9278 1 : int rc = 0;
9279 :
9280 1 : assert(fn != NULL);
9281 :
9282 1 : spdk_spin_lock(&g_bdev_mgr.spinlock);
9283 1 : bdev = spdk_bdev_first_leaf();
9284 6 : while (bdev != NULL) {
9285 5 : rc = bdev_desc_alloc(bdev, _tmp_bdev_event_cb, NULL, NULL, &desc);
9286 5 : if (rc != 0) {
9287 0 : break;
9288 : }
9289 5 : rc = bdev_open(bdev, false, desc);
9290 5 : if (rc != 0) {
9291 1 : bdev_desc_free(desc);
9292 1 : if (rc == -ENODEV) {
9293 : /* Ignore the error and move to the next bdev. */
9294 1 : rc = 0;
9295 1 : bdev = spdk_bdev_next_leaf(bdev);
9296 1 : continue;
9297 : }
9298 0 : break;
9299 : }
9300 4 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
9301 :
9302 4 : rc = fn(ctx, bdev);
9303 :
9304 4 : spdk_spin_lock(&g_bdev_mgr.spinlock);
9305 4 : tmp = spdk_bdev_next_leaf(bdev);
9306 4 : bdev_close(bdev, desc);
9307 4 : if (rc != 0) {
9308 0 : break;
9309 : }
9310 4 : bdev = tmp;
9311 : }
9312 1 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
9313 :
9314 1 : return rc;
9315 : }
9316 :
9317 : void
9318 0 : spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp)
9319 : {
9320 : struct iovec *iovs;
9321 : int iovcnt;
9322 :
9323 0 : if (bdev_io == NULL) {
9324 0 : return;
9325 : }
9326 :
9327 0 : switch (bdev_io->type) {
9328 : case SPDK_BDEV_IO_TYPE_READ:
9329 : case SPDK_BDEV_IO_TYPE_WRITE:
9330 : case SPDK_BDEV_IO_TYPE_ZCOPY:
9331 0 : iovs = bdev_io->u.bdev.iovs;
9332 0 : iovcnt = bdev_io->u.bdev.iovcnt;
9333 0 : break;
9334 : default:
9335 0 : iovs = NULL;
9336 0 : iovcnt = 0;
9337 0 : break;
9338 : }
9339 :
9340 0 : if (iovp) {
9341 0 : *iovp = iovs;
9342 0 : }
9343 0 : if (iovcntp) {
9344 0 : *iovcntp = iovcnt;
9345 0 : }
9346 0 : }
9347 :
9348 : void *
9349 0 : spdk_bdev_io_get_md_buf(struct spdk_bdev_io *bdev_io)
9350 : {
9351 0 : if (bdev_io == NULL) {
9352 0 : return NULL;
9353 : }
9354 :
9355 0 : if (!spdk_bdev_is_md_separate(bdev_io->bdev)) {
9356 0 : return NULL;
9357 : }
9358 :
9359 0 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ ||
9360 0 : bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
9361 0 : return bdev_io->u.bdev.md_buf;
9362 : }
9363 :
9364 0 : return NULL;
9365 0 : }
9366 :
9367 : void *
9368 0 : spdk_bdev_io_get_cb_arg(struct spdk_bdev_io *bdev_io)
9369 : {
9370 0 : if (bdev_io == NULL) {
9371 0 : assert(false);
9372 : return NULL;
9373 : }
9374 :
9375 0 : return bdev_io->internal.caller_ctx;
9376 : }
9377 :
9378 : void
9379 7 : spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module)
9380 : {
9381 :
9382 7 : if (spdk_bdev_module_list_find(bdev_module->name)) {
9383 0 : SPDK_ERRLOG("ERROR: module '%s' already registered.\n", bdev_module->name);
9384 0 : assert(false);
9385 : }
9386 :
9387 7 : spdk_spin_init(&bdev_module->internal.spinlock);
9388 7 : TAILQ_INIT(&bdev_module->internal.quiesced_ranges);
9389 :
9390 : /*
9391 : * Modules with examine callbacks must be initialized first, so they are
9392 : * ready to handle examine callbacks from later modules that will
9393 : * register physical bdevs.
9394 : */
9395 7 : if (bdev_module->examine_config != NULL || bdev_module->examine_disk != NULL) {
9396 4 : TAILQ_INSERT_HEAD(&g_bdev_mgr.bdev_modules, bdev_module, internal.tailq);
9397 4 : } else {
9398 3 : TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, internal.tailq);
9399 : }
9400 7 : }
9401 :
9402 : struct spdk_bdev_module *
9403 7 : spdk_bdev_module_list_find(const char *name)
9404 : {
9405 : struct spdk_bdev_module *bdev_module;
9406 :
9407 14 : TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
9408 7 : if (strcmp(name, bdev_module->name) == 0) {
9409 0 : break;
9410 : }
9411 7 : }
9412 :
9413 7 : return bdev_module;
9414 : }
9415 :
9416 : static int
9417 6 : bdev_write_zero_buffer(struct spdk_bdev_io *bdev_io)
9418 : {
9419 : uint64_t num_blocks;
9420 6 : void *md_buf = NULL;
9421 :
9422 6 : num_blocks = bdev_io->u.bdev.num_blocks;
9423 :
9424 6 : if (spdk_bdev_is_md_separate(bdev_io->bdev)) {
9425 4 : md_buf = (char *)g_bdev_mgr.zero_buffer +
9426 2 : spdk_bdev_get_block_size(bdev_io->bdev) * num_blocks;
9427 2 : }
9428 :
9429 12 : return bdev_write_blocks_with_md(bdev_io->internal.desc,
9430 6 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
9431 6 : g_bdev_mgr.zero_buffer, md_buf,
9432 6 : bdev_io->u.bdev.offset_blocks, num_blocks,
9433 6 : bdev_write_zero_buffer_done, bdev_io);
9434 : }
9435 :
9436 : static void
9437 6 : bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
9438 : {
9439 6 : struct spdk_bdev_io *parent_io = cb_arg;
9440 :
9441 6 : spdk_bdev_free_io(bdev_io);
9442 :
9443 6 : parent_io->internal.status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
9444 6 : parent_io->internal.cb(parent_io, success, parent_io->internal.caller_ctx);
9445 6 : }
9446 :
9447 : static void
9448 10 : bdev_set_qos_limit_done(struct set_qos_limit_ctx *ctx, int status)
9449 : {
9450 10 : spdk_spin_lock(&ctx->bdev->internal.spinlock);
9451 10 : ctx->bdev->internal.qos_mod_in_progress = false;
9452 10 : spdk_spin_unlock(&ctx->bdev->internal.spinlock);
9453 :
9454 10 : if (ctx->cb_fn) {
9455 8 : ctx->cb_fn(ctx->cb_arg, status);
9456 8 : }
9457 10 : free(ctx);
9458 10 : }
9459 :
9460 : static void
9461 2 : bdev_disable_qos_done(void *cb_arg)
9462 : {
9463 2 : struct set_qos_limit_ctx *ctx = cb_arg;
9464 2 : struct spdk_bdev *bdev = ctx->bdev;
9465 : struct spdk_bdev_qos *qos;
9466 :
9467 2 : spdk_spin_lock(&bdev->internal.spinlock);
9468 2 : qos = bdev->internal.qos;
9469 2 : bdev->internal.qos = NULL;
9470 2 : spdk_spin_unlock(&bdev->internal.spinlock);
9471 :
9472 2 : if (qos->thread != NULL) {
9473 2 : spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch));
9474 2 : spdk_poller_unregister(&qos->poller);
9475 2 : }
9476 :
9477 2 : free(qos);
9478 :
9479 2 : bdev_set_qos_limit_done(ctx, 0);
9480 2 : }
9481 :
9482 : static void
9483 2 : bdev_disable_qos_msg_done(struct spdk_bdev *bdev, void *_ctx, int status)
9484 : {
9485 2 : struct set_qos_limit_ctx *ctx = _ctx;
9486 : struct spdk_thread *thread;
9487 :
9488 2 : spdk_spin_lock(&bdev->internal.spinlock);
9489 2 : thread = bdev->internal.qos->thread;
9490 2 : spdk_spin_unlock(&bdev->internal.spinlock);
9491 :
9492 2 : if (thread != NULL) {
9493 2 : spdk_thread_send_msg(thread, bdev_disable_qos_done, ctx);
9494 2 : } else {
9495 0 : bdev_disable_qos_done(ctx);
9496 : }
9497 2 : }
9498 :
9499 : static void
9500 4 : bdev_disable_qos_msg(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
9501 : struct spdk_io_channel *ch, void *_ctx)
9502 : {
9503 4 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(ch);
9504 : struct spdk_bdev_io *bdev_io;
9505 :
9506 4 : bdev_ch->flags &= ~BDEV_CH_QOS_ENABLED;
9507 :
9508 6 : while (!TAILQ_EMPTY(&bdev_ch->qos_queued_io)) {
9509 : /* Re-submit the queued I/O. */
9510 2 : bdev_io = TAILQ_FIRST(&bdev_ch->qos_queued_io);
9511 2 : TAILQ_REMOVE(&bdev_ch->qos_queued_io, bdev_io, internal.link);
9512 2 : _bdev_io_submit(bdev_io);
9513 : }
9514 :
9515 4 : spdk_bdev_for_each_channel_continue(i, 0);
9516 4 : }
9517 :
9518 : static void
9519 1 : bdev_update_qos_rate_limit_msg(void *cb_arg)
9520 : {
9521 1 : struct set_qos_limit_ctx *ctx = cb_arg;
9522 1 : struct spdk_bdev *bdev = ctx->bdev;
9523 :
9524 1 : spdk_spin_lock(&bdev->internal.spinlock);
9525 1 : bdev_qos_update_max_quota_per_timeslice(bdev->internal.qos);
9526 1 : spdk_spin_unlock(&bdev->internal.spinlock);
9527 :
9528 1 : bdev_set_qos_limit_done(ctx, 0);
9529 1 : }
9530 :
9531 : static void
9532 9 : bdev_enable_qos_msg(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
9533 : struct spdk_io_channel *ch, void *_ctx)
9534 : {
9535 9 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(ch);
9536 :
9537 9 : spdk_spin_lock(&bdev->internal.spinlock);
9538 9 : bdev_enable_qos(bdev, bdev_ch);
9539 9 : spdk_spin_unlock(&bdev->internal.spinlock);
9540 9 : spdk_bdev_for_each_channel_continue(i, 0);
9541 9 : }
9542 :
9543 : static void
9544 6 : bdev_enable_qos_done(struct spdk_bdev *bdev, void *_ctx, int status)
9545 : {
9546 6 : struct set_qos_limit_ctx *ctx = _ctx;
9547 :
9548 6 : bdev_set_qos_limit_done(ctx, status);
9549 6 : }
9550 :
9551 : static void
9552 7 : bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits)
9553 : {
9554 : int i;
9555 :
9556 7 : assert(bdev->internal.qos != NULL);
9557 :
9558 35 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
9559 28 : if (limits[i] != SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
9560 28 : bdev->internal.qos->rate_limits[i].limit = limits[i];
9561 :
9562 28 : if (limits[i] == 0) {
9563 19 : bdev->internal.qos->rate_limits[i].limit =
9564 : SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
9565 19 : }
9566 28 : }
9567 28 : }
9568 7 : }
9569 :
9570 : void
9571 9 : spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits,
9572 : void (*cb_fn)(void *cb_arg, int status), void *cb_arg)
9573 : {
9574 : struct set_qos_limit_ctx *ctx;
9575 : uint32_t limit_set_complement;
9576 : uint64_t min_limit_per_sec;
9577 : int i;
9578 9 : bool disable_rate_limit = true;
9579 :
9580 45 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
9581 36 : if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
9582 0 : continue;
9583 : }
9584 :
9585 36 : if (limits[i] > 0) {
9586 10 : disable_rate_limit = false;
9587 10 : }
9588 :
9589 36 : if (bdev_qos_is_iops_rate_limit(i) == true) {
9590 9 : min_limit_per_sec = SPDK_BDEV_QOS_MIN_IOS_PER_SEC;
9591 9 : } else {
9592 27 : if (limits[i] > SPDK_BDEV_QOS_MAX_MBYTES_PER_SEC) {
9593 0 : SPDK_WARNLOG("Requested rate limit %" PRIu64 " will result in uint64_t overflow, "
9594 : "reset to %" PRIu64 "\n", limits[i], SPDK_BDEV_QOS_MAX_MBYTES_PER_SEC);
9595 0 : limits[i] = SPDK_BDEV_QOS_MAX_MBYTES_PER_SEC;
9596 0 : }
9597 : /* Change from megabyte to byte rate limit */
9598 27 : limits[i] = limits[i] * 1024 * 1024;
9599 27 : min_limit_per_sec = SPDK_BDEV_QOS_MIN_BYTES_PER_SEC;
9600 : }
9601 :
9602 36 : limit_set_complement = limits[i] % min_limit_per_sec;
9603 36 : if (limit_set_complement) {
9604 0 : SPDK_ERRLOG("Requested rate limit %" PRIu64 " is not a multiple of %" PRIu64 "\n",
9605 : limits[i], min_limit_per_sec);
9606 0 : limits[i] += min_limit_per_sec - limit_set_complement;
9607 0 : SPDK_ERRLOG("Round up the rate limit to %" PRIu64 "\n", limits[i]);
9608 0 : }
9609 36 : }
9610 :
9611 9 : ctx = calloc(1, sizeof(*ctx));
9612 9 : if (ctx == NULL) {
9613 0 : cb_fn(cb_arg, -ENOMEM);
9614 0 : return;
9615 : }
9616 :
9617 9 : ctx->cb_fn = cb_fn;
9618 9 : ctx->cb_arg = cb_arg;
9619 9 : ctx->bdev = bdev;
9620 :
9621 9 : spdk_spin_lock(&bdev->internal.spinlock);
9622 9 : if (bdev->internal.qos_mod_in_progress) {
9623 1 : spdk_spin_unlock(&bdev->internal.spinlock);
9624 1 : free(ctx);
9625 1 : cb_fn(cb_arg, -EAGAIN);
9626 1 : return;
9627 : }
9628 8 : bdev->internal.qos_mod_in_progress = true;
9629 :
9630 8 : if (disable_rate_limit == true && bdev->internal.qos) {
9631 10 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
9632 8 : if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED &&
9633 0 : (bdev->internal.qos->rate_limits[i].limit > 0 &&
9634 0 : bdev->internal.qos->rate_limits[i].limit !=
9635 : SPDK_BDEV_QOS_LIMIT_NOT_DEFINED)) {
9636 0 : disable_rate_limit = false;
9637 0 : break;
9638 : }
9639 8 : }
9640 2 : }
9641 :
9642 8 : if (disable_rate_limit == false) {
9643 5 : if (bdev->internal.qos == NULL) {
9644 4 : bdev->internal.qos = calloc(1, sizeof(*bdev->internal.qos));
9645 4 : if (!bdev->internal.qos) {
9646 0 : spdk_spin_unlock(&bdev->internal.spinlock);
9647 0 : SPDK_ERRLOG("Unable to allocate memory for QoS tracking\n");
9648 0 : bdev_set_qos_limit_done(ctx, -ENOMEM);
9649 0 : return;
9650 : }
9651 4 : }
9652 :
9653 5 : if (bdev->internal.qos->thread == NULL) {
9654 : /* Enabling */
9655 4 : bdev_set_qos_rate_limits(bdev, limits);
9656 :
9657 4 : spdk_bdev_for_each_channel(bdev, bdev_enable_qos_msg, ctx,
9658 : bdev_enable_qos_done);
9659 4 : } else {
9660 : /* Updating */
9661 1 : bdev_set_qos_rate_limits(bdev, limits);
9662 :
9663 2 : spdk_thread_send_msg(bdev->internal.qos->thread,
9664 1 : bdev_update_qos_rate_limit_msg, ctx);
9665 : }
9666 5 : } else {
9667 3 : if (bdev->internal.qos != NULL) {
9668 2 : bdev_set_qos_rate_limits(bdev, limits);
9669 :
9670 : /* Disabling */
9671 2 : spdk_bdev_for_each_channel(bdev, bdev_disable_qos_msg, ctx,
9672 : bdev_disable_qos_msg_done);
9673 2 : } else {
9674 1 : spdk_spin_unlock(&bdev->internal.spinlock);
9675 1 : bdev_set_qos_limit_done(ctx, 0);
9676 1 : return;
9677 : }
9678 : }
9679 :
9680 7 : spdk_spin_unlock(&bdev->internal.spinlock);
9681 9 : }
9682 :
9683 : struct spdk_bdev_histogram_ctx {
9684 : spdk_bdev_histogram_status_cb cb_fn;
9685 : void *cb_arg;
9686 : struct spdk_bdev *bdev;
9687 : int status;
9688 : };
9689 :
9690 : static void
9691 2 : bdev_histogram_disable_channel_cb(struct spdk_bdev *bdev, void *_ctx, int status)
9692 : {
9693 2 : struct spdk_bdev_histogram_ctx *ctx = _ctx;
9694 :
9695 2 : spdk_spin_lock(&ctx->bdev->internal.spinlock);
9696 2 : ctx->bdev->internal.histogram_in_progress = false;
9697 2 : spdk_spin_unlock(&ctx->bdev->internal.spinlock);
9698 2 : ctx->cb_fn(ctx->cb_arg, ctx->status);
9699 2 : free(ctx);
9700 2 : }
9701 :
9702 : static void
9703 3 : bdev_histogram_disable_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
9704 : struct spdk_io_channel *_ch, void *_ctx)
9705 : {
9706 3 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
9707 :
9708 3 : if (ch->histogram != NULL) {
9709 3 : spdk_histogram_data_free(ch->histogram);
9710 3 : ch->histogram = NULL;
9711 3 : }
9712 3 : spdk_bdev_for_each_channel_continue(i, 0);
9713 3 : }
9714 :
9715 : static void
9716 2 : bdev_histogram_enable_channel_cb(struct spdk_bdev *bdev, void *_ctx, int status)
9717 : {
9718 2 : struct spdk_bdev_histogram_ctx *ctx = _ctx;
9719 :
9720 2 : if (status != 0) {
9721 0 : ctx->status = status;
9722 0 : ctx->bdev->internal.histogram_enabled = false;
9723 0 : spdk_bdev_for_each_channel(ctx->bdev, bdev_histogram_disable_channel, ctx,
9724 : bdev_histogram_disable_channel_cb);
9725 0 : } else {
9726 2 : spdk_spin_lock(&ctx->bdev->internal.spinlock);
9727 2 : ctx->bdev->internal.histogram_in_progress = false;
9728 2 : spdk_spin_unlock(&ctx->bdev->internal.spinlock);
9729 2 : ctx->cb_fn(ctx->cb_arg, ctx->status);
9730 2 : free(ctx);
9731 : }
9732 2 : }
9733 :
9734 : static void
9735 3 : bdev_histogram_enable_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
9736 : struct spdk_io_channel *_ch, void *_ctx)
9737 : {
9738 3 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
9739 3 : int status = 0;
9740 :
9741 3 : if (ch->histogram == NULL) {
9742 3 : ch->histogram = spdk_histogram_data_alloc();
9743 3 : if (ch->histogram == NULL) {
9744 0 : status = -ENOMEM;
9745 0 : }
9746 3 : }
9747 :
9748 3 : spdk_bdev_for_each_channel_continue(i, status);
9749 3 : }
9750 :
9751 : void
9752 4 : spdk_bdev_histogram_enable_ext(struct spdk_bdev *bdev, spdk_bdev_histogram_status_cb cb_fn,
9753 : void *cb_arg, bool enable, struct spdk_bdev_enable_histogram_opts *opts)
9754 : {
9755 : struct spdk_bdev_histogram_ctx *ctx;
9756 :
9757 4 : ctx = calloc(1, sizeof(struct spdk_bdev_histogram_ctx));
9758 4 : if (ctx == NULL) {
9759 0 : cb_fn(cb_arg, -ENOMEM);
9760 0 : return;
9761 : }
9762 :
9763 4 : ctx->bdev = bdev;
9764 4 : ctx->status = 0;
9765 4 : ctx->cb_fn = cb_fn;
9766 4 : ctx->cb_arg = cb_arg;
9767 :
9768 4 : spdk_spin_lock(&bdev->internal.spinlock);
9769 4 : if (bdev->internal.histogram_in_progress) {
9770 0 : spdk_spin_unlock(&bdev->internal.spinlock);
9771 0 : free(ctx);
9772 0 : cb_fn(cb_arg, -EAGAIN);
9773 0 : return;
9774 : }
9775 :
9776 4 : bdev->internal.histogram_in_progress = true;
9777 4 : spdk_spin_unlock(&bdev->internal.spinlock);
9778 :
9779 4 : bdev->internal.histogram_enabled = enable;
9780 4 : bdev->internal.histogram_io_type = opts->io_type;
9781 :
9782 4 : if (enable) {
9783 : /* Allocate histogram for each channel */
9784 2 : spdk_bdev_for_each_channel(bdev, bdev_histogram_enable_channel, ctx,
9785 : bdev_histogram_enable_channel_cb);
9786 2 : } else {
9787 2 : spdk_bdev_for_each_channel(bdev, bdev_histogram_disable_channel, ctx,
9788 : bdev_histogram_disable_channel_cb);
9789 : }
9790 4 : }
9791 :
9792 : void
9793 4 : spdk_bdev_enable_histogram_opts_init(struct spdk_bdev_enable_histogram_opts *opts, size_t size)
9794 : {
9795 4 : if (opts == NULL) {
9796 0 : SPDK_ERRLOG("opts should not be NULL\n");
9797 0 : assert(opts != NULL);
9798 0 : return;
9799 : }
9800 4 : if (size == 0) {
9801 0 : SPDK_ERRLOG("size should not be zero\n");
9802 0 : assert(size != 0);
9803 0 : return;
9804 : }
9805 :
9806 4 : memset(opts, 0, size);
9807 4 : opts->size = size;
9808 :
9809 : #define FIELD_OK(field) \
9810 : offsetof(struct spdk_bdev_enable_histogram_opts, field) + sizeof(opts->field) <= size
9811 :
9812 : #define SET_FIELD(field, value) \
9813 : if (FIELD_OK(field)) { \
9814 : opts->field = value; \
9815 : } \
9816 :
9817 4 : SET_FIELD(io_type, 0);
9818 :
9819 : /* You should not remove this statement, but need to update the assert statement
9820 : * if you add a new field, and also add a corresponding SET_FIELD statement */
9821 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_enable_histogram_opts) == 9, "Incorrect size");
9822 :
9823 : #undef FIELD_OK
9824 : #undef SET_FIELD
9825 4 : }
9826 :
9827 : void
9828 4 : spdk_bdev_histogram_enable(struct spdk_bdev *bdev, spdk_bdev_histogram_status_cb cb_fn,
9829 : void *cb_arg, bool enable)
9830 : {
9831 : struct spdk_bdev_enable_histogram_opts opts;
9832 :
9833 4 : spdk_bdev_enable_histogram_opts_init(&opts, sizeof(opts));
9834 4 : spdk_bdev_histogram_enable_ext(bdev, cb_fn, cb_arg, enable, &opts);
9835 4 : }
9836 :
9837 : struct spdk_bdev_histogram_data_ctx {
9838 : spdk_bdev_histogram_data_cb cb_fn;
9839 : void *cb_arg;
9840 : struct spdk_bdev *bdev;
9841 : /** merged histogram data from all channels */
9842 : struct spdk_histogram_data *histogram;
9843 : };
9844 :
9845 : static void
9846 5 : bdev_histogram_get_channel_cb(struct spdk_bdev *bdev, void *_ctx, int status)
9847 : {
9848 5 : struct spdk_bdev_histogram_data_ctx *ctx = _ctx;
9849 :
9850 5 : ctx->cb_fn(ctx->cb_arg, status, ctx->histogram);
9851 5 : free(ctx);
9852 5 : }
9853 :
9854 : static void
9855 7 : bdev_histogram_get_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
9856 : struct spdk_io_channel *_ch, void *_ctx)
9857 : {
9858 7 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
9859 7 : struct spdk_bdev_histogram_data_ctx *ctx = _ctx;
9860 7 : int status = 0;
9861 :
9862 7 : if (ch->histogram == NULL) {
9863 1 : status = -EFAULT;
9864 1 : } else {
9865 6 : spdk_histogram_data_merge(ctx->histogram, ch->histogram);
9866 : }
9867 :
9868 7 : spdk_bdev_for_each_channel_continue(i, status);
9869 7 : }
9870 :
9871 : void
9872 5 : spdk_bdev_histogram_get(struct spdk_bdev *bdev, struct spdk_histogram_data *histogram,
9873 : spdk_bdev_histogram_data_cb cb_fn,
9874 : void *cb_arg)
9875 : {
9876 : struct spdk_bdev_histogram_data_ctx *ctx;
9877 :
9878 5 : ctx = calloc(1, sizeof(struct spdk_bdev_histogram_data_ctx));
9879 5 : if (ctx == NULL) {
9880 0 : cb_fn(cb_arg, -ENOMEM, NULL);
9881 0 : return;
9882 : }
9883 :
9884 5 : ctx->bdev = bdev;
9885 5 : ctx->cb_fn = cb_fn;
9886 5 : ctx->cb_arg = cb_arg;
9887 :
9888 5 : ctx->histogram = histogram;
9889 :
9890 5 : spdk_bdev_for_each_channel(bdev, bdev_histogram_get_channel, ctx,
9891 : bdev_histogram_get_channel_cb);
9892 5 : }
9893 :
9894 : void
9895 2 : spdk_bdev_channel_get_histogram(struct spdk_io_channel *ch, spdk_bdev_histogram_data_cb cb_fn,
9896 : void *cb_arg)
9897 : {
9898 2 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(ch);
9899 2 : int status = 0;
9900 :
9901 2 : assert(cb_fn != NULL);
9902 :
9903 2 : if (bdev_ch->histogram == NULL) {
9904 1 : status = -EFAULT;
9905 1 : }
9906 2 : cb_fn(cb_arg, status, bdev_ch->histogram);
9907 2 : }
9908 :
9909 : size_t
9910 0 : spdk_bdev_get_media_events(struct spdk_bdev_desc *desc, struct spdk_bdev_media_event *events,
9911 : size_t max_events)
9912 : {
9913 : struct media_event_entry *entry;
9914 0 : size_t num_events = 0;
9915 :
9916 0 : for (; num_events < max_events; ++num_events) {
9917 0 : entry = TAILQ_FIRST(&desc->pending_media_events);
9918 0 : if (entry == NULL) {
9919 0 : break;
9920 : }
9921 :
9922 0 : events[num_events] = entry->event;
9923 0 : TAILQ_REMOVE(&desc->pending_media_events, entry, tailq);
9924 0 : TAILQ_INSERT_TAIL(&desc->free_media_events, entry, tailq);
9925 0 : }
9926 :
9927 0 : return num_events;
9928 : }
9929 :
9930 : int
9931 0 : spdk_bdev_push_media_events(struct spdk_bdev *bdev, const struct spdk_bdev_media_event *events,
9932 : size_t num_events)
9933 : {
9934 : struct spdk_bdev_desc *desc;
9935 : struct media_event_entry *entry;
9936 : size_t event_id;
9937 0 : int rc = 0;
9938 :
9939 0 : assert(bdev->media_events);
9940 :
9941 0 : spdk_spin_lock(&bdev->internal.spinlock);
9942 0 : TAILQ_FOREACH(desc, &bdev->internal.open_descs, link) {
9943 0 : if (desc->write) {
9944 0 : break;
9945 : }
9946 0 : }
9947 :
9948 0 : if (desc == NULL || desc->media_events_buffer == NULL) {
9949 0 : rc = -ENODEV;
9950 0 : goto out;
9951 : }
9952 :
9953 0 : for (event_id = 0; event_id < num_events; ++event_id) {
9954 0 : entry = TAILQ_FIRST(&desc->free_media_events);
9955 0 : if (entry == NULL) {
9956 0 : break;
9957 : }
9958 :
9959 0 : TAILQ_REMOVE(&desc->free_media_events, entry, tailq);
9960 0 : TAILQ_INSERT_TAIL(&desc->pending_media_events, entry, tailq);
9961 0 : entry->event = events[event_id];
9962 0 : }
9963 :
9964 0 : rc = event_id;
9965 : out:
9966 0 : spdk_spin_unlock(&bdev->internal.spinlock);
9967 0 : return rc;
9968 : }
9969 :
9970 : static void
9971 0 : _media_management_notify(void *arg)
9972 : {
9973 0 : struct spdk_bdev_desc *desc = arg;
9974 :
9975 0 : _event_notify(desc, SPDK_BDEV_EVENT_MEDIA_MANAGEMENT);
9976 0 : }
9977 :
9978 : void
9979 0 : spdk_bdev_notify_media_management(struct spdk_bdev *bdev)
9980 : {
9981 : struct spdk_bdev_desc *desc;
9982 :
9983 0 : spdk_spin_lock(&bdev->internal.spinlock);
9984 0 : TAILQ_FOREACH(desc, &bdev->internal.open_descs, link) {
9985 0 : if (!TAILQ_EMPTY(&desc->pending_media_events)) {
9986 0 : event_notify(desc, _media_management_notify);
9987 0 : }
9988 0 : }
9989 0 : spdk_spin_unlock(&bdev->internal.spinlock);
9990 0 : }
9991 :
9992 : struct locked_lba_range_ctx {
9993 : struct lba_range range;
9994 : struct lba_range *current_range;
9995 : struct lba_range *owner_range;
9996 : struct spdk_poller *poller;
9997 : lock_range_cb cb_fn;
9998 : void *cb_arg;
9999 : };
10000 :
10001 : static void
10002 0 : bdev_lock_error_cleanup_cb(struct spdk_bdev *bdev, void *_ctx, int status)
10003 : {
10004 0 : struct locked_lba_range_ctx *ctx = _ctx;
10005 :
10006 0 : ctx->cb_fn(&ctx->range, ctx->cb_arg, -ENOMEM);
10007 0 : free(ctx);
10008 0 : }
10009 :
10010 : static void bdev_unlock_lba_range_get_channel(struct spdk_bdev_channel_iter *i,
10011 : struct spdk_bdev *bdev, struct spdk_io_channel *ch, void *_ctx);
10012 :
10013 : static void
10014 14 : bdev_lock_lba_range_cb(struct spdk_bdev *bdev, void *_ctx, int status)
10015 : {
10016 14 : struct locked_lba_range_ctx *ctx = _ctx;
10017 :
10018 14 : if (status == -ENOMEM) {
10019 : /* One of the channels could not allocate a range object.
10020 : * So we have to go back and clean up any ranges that were
10021 : * allocated successfully before we return error status to
10022 : * the caller. We can reuse the unlock function to do that
10023 : * clean up.
10024 : */
10025 0 : spdk_bdev_for_each_channel(bdev, bdev_unlock_lba_range_get_channel, ctx,
10026 : bdev_lock_error_cleanup_cb);
10027 0 : return;
10028 : }
10029 :
10030 : /* All channels have locked this range and no I/O overlapping the range
10031 : * are outstanding! Set the owner_ch for the range object for the
10032 : * locking channel, so that this channel will know that it is allowed
10033 : * to write to this range.
10034 : */
10035 14 : if (ctx->owner_range != NULL) {
10036 10 : ctx->owner_range->owner_ch = ctx->range.owner_ch;
10037 10 : }
10038 :
10039 14 : ctx->cb_fn(&ctx->range, ctx->cb_arg, status);
10040 :
10041 : /* Don't free the ctx here. Its range is in the bdev's global list of
10042 : * locked ranges still, and will be removed and freed when this range
10043 : * is later unlocked.
10044 : */
10045 14 : }
10046 :
10047 : static int
10048 17 : bdev_lock_lba_range_check_io(void *_i)
10049 : {
10050 17 : struct spdk_bdev_channel_iter *i = _i;
10051 17 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i->i);
10052 17 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
10053 17 : struct locked_lba_range_ctx *ctx = i->ctx;
10054 17 : struct lba_range *range = ctx->current_range;
10055 : struct spdk_bdev_io *bdev_io;
10056 :
10057 17 : spdk_poller_unregister(&ctx->poller);
10058 :
10059 : /* The range is now in the locked_ranges, so no new IO can be submitted to this
10060 : * range. But we need to wait until any outstanding IO overlapping with this range
10061 : * are completed.
10062 : */
10063 18 : TAILQ_FOREACH(bdev_io, &ch->io_submitted, internal.ch_link) {
10064 3 : if (bdev_io_range_is_locked(bdev_io, range)) {
10065 2 : ctx->poller = SPDK_POLLER_REGISTER(bdev_lock_lba_range_check_io, i, 100);
10066 2 : return SPDK_POLLER_BUSY;
10067 : }
10068 1 : }
10069 :
10070 15 : spdk_bdev_for_each_channel_continue(i, 0);
10071 15 : return SPDK_POLLER_BUSY;
10072 17 : }
10073 :
10074 : static void
10075 15 : bdev_lock_lba_range_get_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
10076 : struct spdk_io_channel *_ch, void *_ctx)
10077 : {
10078 15 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
10079 15 : struct locked_lba_range_ctx *ctx = _ctx;
10080 : struct lba_range *range;
10081 :
10082 16 : TAILQ_FOREACH(range, &ch->locked_ranges, tailq) {
10083 1 : if (range->length == ctx->range.length &&
10084 0 : range->offset == ctx->range.offset &&
10085 0 : range->locked_ctx == ctx->range.locked_ctx) {
10086 : /* This range already exists on this channel, so don't add
10087 : * it again. This can happen when a new channel is created
10088 : * while the for_each_channel operation is in progress.
10089 : * Do not check for outstanding I/O in that case, since the
10090 : * range was locked before any I/O could be submitted to the
10091 : * new channel.
10092 : */
10093 0 : spdk_bdev_for_each_channel_continue(i, 0);
10094 0 : return;
10095 : }
10096 1 : }
10097 :
10098 15 : range = calloc(1, sizeof(*range));
10099 15 : if (range == NULL) {
10100 0 : spdk_bdev_for_each_channel_continue(i, -ENOMEM);
10101 0 : return;
10102 : }
10103 :
10104 15 : range->length = ctx->range.length;
10105 15 : range->offset = ctx->range.offset;
10106 15 : range->locked_ctx = ctx->range.locked_ctx;
10107 15 : range->quiesce = ctx->range.quiesce;
10108 15 : ctx->current_range = range;
10109 15 : if (ctx->range.owner_ch == ch) {
10110 : /* This is the range object for the channel that will hold
10111 : * the lock. Store it in the ctx object so that we can easily
10112 : * set its owner_ch after the lock is finally acquired.
10113 : */
10114 10 : ctx->owner_range = range;
10115 10 : }
10116 15 : TAILQ_INSERT_TAIL(&ch->locked_ranges, range, tailq);
10117 15 : bdev_lock_lba_range_check_io(i);
10118 15 : }
10119 :
10120 : static void
10121 14 : bdev_lock_lba_range_ctx(struct spdk_bdev *bdev, struct locked_lba_range_ctx *ctx)
10122 : {
10123 14 : assert(spdk_get_thread() == ctx->range.owner_thread);
10124 14 : assert(ctx->range.owner_ch == NULL ||
10125 : spdk_io_channel_get_thread(ctx->range.owner_ch->channel) == ctx->range.owner_thread);
10126 :
10127 : /* We will add a copy of this range to each channel now. */
10128 14 : spdk_bdev_for_each_channel(bdev, bdev_lock_lba_range_get_channel, ctx,
10129 : bdev_lock_lba_range_cb);
10130 14 : }
10131 :
10132 : static bool
10133 17 : bdev_lba_range_overlaps_tailq(struct lba_range *range, lba_range_tailq_t *tailq)
10134 : {
10135 : struct lba_range *r;
10136 :
10137 18 : TAILQ_FOREACH(r, tailq, tailq) {
10138 4 : if (bdev_lba_range_overlapped(range, r)) {
10139 3 : return true;
10140 : }
10141 1 : }
10142 14 : return false;
10143 17 : }
10144 :
10145 : static void bdev_quiesce_range_locked(struct lba_range *range, void *ctx, int status);
10146 :
10147 : static int
10148 14 : _bdev_lock_lba_range(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch,
10149 : uint64_t offset, uint64_t length,
10150 : lock_range_cb cb_fn, void *cb_arg)
10151 : {
10152 : struct locked_lba_range_ctx *ctx;
10153 :
10154 14 : ctx = calloc(1, sizeof(*ctx));
10155 14 : if (ctx == NULL) {
10156 0 : return -ENOMEM;
10157 : }
10158 :
10159 14 : ctx->range.offset = offset;
10160 14 : ctx->range.length = length;
10161 14 : ctx->range.owner_thread = spdk_get_thread();
10162 14 : ctx->range.owner_ch = ch;
10163 14 : ctx->range.locked_ctx = cb_arg;
10164 14 : ctx->range.bdev = bdev;
10165 14 : ctx->range.quiesce = (cb_fn == bdev_quiesce_range_locked);
10166 14 : ctx->cb_fn = cb_fn;
10167 14 : ctx->cb_arg = cb_arg;
10168 :
10169 14 : spdk_spin_lock(&bdev->internal.spinlock);
10170 14 : if (bdev_lba_range_overlaps_tailq(&ctx->range, &bdev->internal.locked_ranges)) {
10171 : /* There is an active lock overlapping with this range.
10172 : * Put it on the pending list until this range no
10173 : * longer overlaps with another.
10174 : */
10175 2 : TAILQ_INSERT_TAIL(&bdev->internal.pending_locked_ranges, &ctx->range, tailq);
10176 2 : } else {
10177 12 : TAILQ_INSERT_TAIL(&bdev->internal.locked_ranges, &ctx->range, tailq);
10178 12 : bdev_lock_lba_range_ctx(bdev, ctx);
10179 : }
10180 14 : spdk_spin_unlock(&bdev->internal.spinlock);
10181 14 : return 0;
10182 14 : }
10183 :
10184 : static int
10185 10 : bdev_lock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
10186 : uint64_t offset, uint64_t length,
10187 : lock_range_cb cb_fn, void *cb_arg)
10188 : {
10189 10 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
10190 10 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
10191 :
10192 10 : if (cb_arg == NULL) {
10193 0 : SPDK_ERRLOG("cb_arg must not be NULL\n");
10194 0 : return -EINVAL;
10195 : }
10196 :
10197 10 : return _bdev_lock_lba_range(bdev, ch, offset, length, cb_fn, cb_arg);
10198 10 : }
10199 :
10200 : static void
10201 2 : bdev_lock_lba_range_ctx_msg(void *_ctx)
10202 : {
10203 2 : struct locked_lba_range_ctx *ctx = _ctx;
10204 :
10205 2 : bdev_lock_lba_range_ctx(ctx->range.bdev, ctx);
10206 2 : }
10207 :
10208 : static void
10209 14 : bdev_unlock_lba_range_cb(struct spdk_bdev *bdev, void *_ctx, int status)
10210 : {
10211 14 : struct locked_lba_range_ctx *ctx = _ctx;
10212 : struct locked_lba_range_ctx *pending_ctx;
10213 : struct lba_range *range, *tmp;
10214 :
10215 14 : spdk_spin_lock(&bdev->internal.spinlock);
10216 : /* Check if there are any pending locked ranges that overlap with this range
10217 : * that was just unlocked. If there are, check that it doesn't overlap with any
10218 : * other locked ranges before calling bdev_lock_lba_range_ctx which will start
10219 : * the lock process.
10220 : */
10221 17 : TAILQ_FOREACH_SAFE(range, &bdev->internal.pending_locked_ranges, tailq, tmp) {
10222 3 : if (bdev_lba_range_overlapped(range, &ctx->range) &&
10223 3 : !bdev_lba_range_overlaps_tailq(range, &bdev->internal.locked_ranges)) {
10224 2 : TAILQ_REMOVE(&bdev->internal.pending_locked_ranges, range, tailq);
10225 2 : pending_ctx = SPDK_CONTAINEROF(range, struct locked_lba_range_ctx, range);
10226 2 : TAILQ_INSERT_TAIL(&bdev->internal.locked_ranges, range, tailq);
10227 4 : spdk_thread_send_msg(pending_ctx->range.owner_thread,
10228 2 : bdev_lock_lba_range_ctx_msg, pending_ctx);
10229 2 : }
10230 3 : }
10231 14 : spdk_spin_unlock(&bdev->internal.spinlock);
10232 :
10233 14 : ctx->cb_fn(&ctx->range, ctx->cb_arg, status);
10234 14 : free(ctx);
10235 14 : }
10236 :
10237 : static void
10238 16 : bdev_unlock_lba_range_get_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
10239 : struct spdk_io_channel *_ch, void *_ctx)
10240 : {
10241 16 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
10242 16 : struct locked_lba_range_ctx *ctx = _ctx;
10243 : TAILQ_HEAD(, spdk_bdev_io) io_locked;
10244 : struct spdk_bdev_io *bdev_io;
10245 : struct lba_range *range;
10246 :
10247 16 : TAILQ_FOREACH(range, &ch->locked_ranges, tailq) {
10248 32 : if (ctx->range.offset == range->offset &&
10249 16 : ctx->range.length == range->length &&
10250 16 : ctx->range.locked_ctx == range->locked_ctx) {
10251 16 : TAILQ_REMOVE(&ch->locked_ranges, range, tailq);
10252 16 : free(range);
10253 16 : break;
10254 : }
10255 0 : }
10256 :
10257 : /* Note: we should almost always be able to assert that the range specified
10258 : * was found. But there are some very rare corner cases where a new channel
10259 : * gets created simultaneously with a range unlock, where this function
10260 : * would execute on that new channel and wouldn't have the range.
10261 : * We also use this to clean up range allocations when a later allocation
10262 : * fails in the locking path.
10263 : * So we can't actually assert() here.
10264 : */
10265 :
10266 : /* Swap the locked IO into a temporary list, and then try to submit them again.
10267 : * We could hyper-optimize this to only resubmit locked I/O that overlap
10268 : * with the range that was just unlocked, but this isn't a performance path so
10269 : * we go for simplicity here.
10270 : */
10271 16 : TAILQ_INIT(&io_locked);
10272 16 : TAILQ_SWAP(&ch->io_locked, &io_locked, spdk_bdev_io, internal.ch_link);
10273 19 : while (!TAILQ_EMPTY(&io_locked)) {
10274 3 : bdev_io = TAILQ_FIRST(&io_locked);
10275 3 : TAILQ_REMOVE(&io_locked, bdev_io, internal.ch_link);
10276 3 : bdev_io_submit(bdev_io);
10277 : }
10278 :
10279 16 : spdk_bdev_for_each_channel_continue(i, 0);
10280 16 : }
10281 :
10282 : static int
10283 14 : _bdev_unlock_lba_range(struct spdk_bdev *bdev, uint64_t offset, uint64_t length,
10284 : lock_range_cb cb_fn, void *cb_arg)
10285 : {
10286 : struct locked_lba_range_ctx *ctx;
10287 : struct lba_range *range;
10288 :
10289 14 : spdk_spin_lock(&bdev->internal.spinlock);
10290 : /* To start the unlock the process, we find the range in the bdev's locked_ranges
10291 : * and remove it. This ensures new channels don't inherit the locked range.
10292 : * Then we will send a message to each channel to remove the range from its
10293 : * per-channel list.
10294 : */
10295 14 : TAILQ_FOREACH(range, &bdev->internal.locked_ranges, tailq) {
10296 24 : if (range->offset == offset && range->length == length &&
10297 14 : (range->owner_ch == NULL || range->locked_ctx == cb_arg)) {
10298 14 : break;
10299 : }
10300 0 : }
10301 14 : if (range == NULL) {
10302 0 : assert(false);
10303 : spdk_spin_unlock(&bdev->internal.spinlock);
10304 : return -EINVAL;
10305 : }
10306 14 : TAILQ_REMOVE(&bdev->internal.locked_ranges, range, tailq);
10307 14 : ctx = SPDK_CONTAINEROF(range, struct locked_lba_range_ctx, range);
10308 14 : spdk_spin_unlock(&bdev->internal.spinlock);
10309 :
10310 14 : ctx->cb_fn = cb_fn;
10311 14 : ctx->cb_arg = cb_arg;
10312 :
10313 14 : spdk_bdev_for_each_channel(bdev, bdev_unlock_lba_range_get_channel, ctx,
10314 : bdev_unlock_lba_range_cb);
10315 14 : return 0;
10316 : }
10317 :
10318 : static int
10319 12 : bdev_unlock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
10320 : uint64_t offset, uint64_t length,
10321 : lock_range_cb cb_fn, void *cb_arg)
10322 : {
10323 12 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
10324 12 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
10325 : struct lba_range *range;
10326 12 : bool range_found = false;
10327 :
10328 : /* Let's make sure the specified channel actually has a lock on
10329 : * the specified range. Note that the range must match exactly.
10330 : */
10331 14 : TAILQ_FOREACH(range, &ch->locked_ranges, tailq) {
10332 22 : if (range->offset == offset && range->length == length &&
10333 11 : range->owner_ch == ch && range->locked_ctx == cb_arg) {
10334 10 : range_found = true;
10335 10 : break;
10336 : }
10337 2 : }
10338 :
10339 12 : if (!range_found) {
10340 2 : return -EINVAL;
10341 : }
10342 :
10343 10 : return _bdev_unlock_lba_range(bdev, offset, length, cb_fn, cb_arg);
10344 12 : }
10345 :
10346 : struct bdev_quiesce_ctx {
10347 : spdk_bdev_quiesce_cb cb_fn;
10348 : void *cb_arg;
10349 : };
10350 :
10351 : static void
10352 4 : bdev_unquiesce_range_unlocked(struct lba_range *range, void *ctx, int status)
10353 : {
10354 4 : struct bdev_quiesce_ctx *quiesce_ctx = ctx;
10355 :
10356 4 : if (quiesce_ctx->cb_fn != NULL) {
10357 4 : quiesce_ctx->cb_fn(quiesce_ctx->cb_arg, status);
10358 4 : }
10359 :
10360 4 : free(quiesce_ctx);
10361 4 : }
10362 :
10363 : static void
10364 4 : bdev_quiesce_range_locked(struct lba_range *range, void *ctx, int status)
10365 : {
10366 4 : struct bdev_quiesce_ctx *quiesce_ctx = ctx;
10367 4 : struct spdk_bdev_module *module = range->bdev->module;
10368 :
10369 4 : if (status != 0) {
10370 0 : if (quiesce_ctx->cb_fn != NULL) {
10371 0 : quiesce_ctx->cb_fn(quiesce_ctx->cb_arg, status);
10372 0 : }
10373 0 : free(quiesce_ctx);
10374 0 : return;
10375 : }
10376 :
10377 4 : spdk_spin_lock(&module->internal.spinlock);
10378 4 : TAILQ_INSERT_TAIL(&module->internal.quiesced_ranges, range, tailq_module);
10379 4 : spdk_spin_unlock(&module->internal.spinlock);
10380 :
10381 4 : if (quiesce_ctx->cb_fn != NULL) {
10382 : /* copy the context in case the range is unlocked by the callback */
10383 4 : struct bdev_quiesce_ctx tmp = *quiesce_ctx;
10384 :
10385 4 : quiesce_ctx->cb_fn = NULL;
10386 4 : quiesce_ctx->cb_arg = NULL;
10387 :
10388 4 : tmp.cb_fn(tmp.cb_arg, status);
10389 4 : }
10390 : /* quiesce_ctx will be freed on unquiesce */
10391 4 : }
10392 :
10393 : static int
10394 9 : _spdk_bdev_quiesce(struct spdk_bdev *bdev, struct spdk_bdev_module *module,
10395 : uint64_t offset, uint64_t length,
10396 : spdk_bdev_quiesce_cb cb_fn, void *cb_arg,
10397 : bool unquiesce)
10398 : {
10399 : struct bdev_quiesce_ctx *quiesce_ctx;
10400 : int rc;
10401 :
10402 9 : if (module != bdev->module) {
10403 0 : SPDK_ERRLOG("Bdev does not belong to specified module.\n");
10404 0 : return -EINVAL;
10405 : }
10406 :
10407 9 : if (!bdev_io_valid_blocks(bdev, offset, length)) {
10408 0 : return -EINVAL;
10409 : }
10410 :
10411 9 : if (unquiesce) {
10412 : struct lba_range *range;
10413 :
10414 : /* Make sure the specified range is actually quiesced in the specified module and
10415 : * then remove it from the list. Note that the range must match exactly.
10416 : */
10417 5 : spdk_spin_lock(&module->internal.spinlock);
10418 6 : TAILQ_FOREACH(range, &module->internal.quiesced_ranges, tailq_module) {
10419 5 : if (range->bdev == bdev && range->offset == offset && range->length == length) {
10420 4 : TAILQ_REMOVE(&module->internal.quiesced_ranges, range, tailq_module);
10421 4 : break;
10422 : }
10423 1 : }
10424 5 : spdk_spin_unlock(&module->internal.spinlock);
10425 :
10426 5 : if (range == NULL) {
10427 1 : SPDK_ERRLOG("The range to unquiesce was not found.\n");
10428 1 : return -EINVAL;
10429 : }
10430 :
10431 4 : quiesce_ctx = range->locked_ctx;
10432 4 : quiesce_ctx->cb_fn = cb_fn;
10433 4 : quiesce_ctx->cb_arg = cb_arg;
10434 :
10435 4 : rc = _bdev_unlock_lba_range(bdev, offset, length, bdev_unquiesce_range_unlocked, quiesce_ctx);
10436 4 : } else {
10437 4 : quiesce_ctx = malloc(sizeof(*quiesce_ctx));
10438 4 : if (quiesce_ctx == NULL) {
10439 0 : return -ENOMEM;
10440 : }
10441 :
10442 4 : quiesce_ctx->cb_fn = cb_fn;
10443 4 : quiesce_ctx->cb_arg = cb_arg;
10444 :
10445 4 : rc = _bdev_lock_lba_range(bdev, NULL, offset, length, bdev_quiesce_range_locked, quiesce_ctx);
10446 4 : if (rc != 0) {
10447 0 : free(quiesce_ctx);
10448 0 : }
10449 : }
10450 :
10451 8 : return rc;
10452 9 : }
10453 :
10454 : int
10455 3 : spdk_bdev_quiesce(struct spdk_bdev *bdev, struct spdk_bdev_module *module,
10456 : spdk_bdev_quiesce_cb cb_fn, void *cb_arg)
10457 : {
10458 3 : return _spdk_bdev_quiesce(bdev, module, 0, bdev->blockcnt, cb_fn, cb_arg, false);
10459 : }
10460 :
10461 : int
10462 3 : spdk_bdev_unquiesce(struct spdk_bdev *bdev, struct spdk_bdev_module *module,
10463 : spdk_bdev_quiesce_cb cb_fn, void *cb_arg)
10464 : {
10465 3 : return _spdk_bdev_quiesce(bdev, module, 0, bdev->blockcnt, cb_fn, cb_arg, true);
10466 : }
10467 :
10468 : int
10469 1 : spdk_bdev_quiesce_range(struct spdk_bdev *bdev, struct spdk_bdev_module *module,
10470 : uint64_t offset, uint64_t length,
10471 : spdk_bdev_quiesce_cb cb_fn, void *cb_arg)
10472 : {
10473 1 : return _spdk_bdev_quiesce(bdev, module, offset, length, cb_fn, cb_arg, false);
10474 : }
10475 :
10476 : int
10477 2 : spdk_bdev_unquiesce_range(struct spdk_bdev *bdev, struct spdk_bdev_module *module,
10478 : uint64_t offset, uint64_t length,
10479 : spdk_bdev_quiesce_cb cb_fn, void *cb_arg)
10480 : {
10481 2 : return _spdk_bdev_quiesce(bdev, module, offset, length, cb_fn, cb_arg, true);
10482 : }
10483 :
10484 : int
10485 272 : spdk_bdev_get_memory_domains(struct spdk_bdev *bdev, struct spdk_memory_domain **domains,
10486 : int array_size)
10487 : {
10488 272 : if (!bdev) {
10489 1 : return -EINVAL;
10490 : }
10491 :
10492 271 : if (bdev->fn_table->get_memory_domains) {
10493 3 : return bdev->fn_table->get_memory_domains(bdev->ctxt, domains, array_size);
10494 : }
10495 :
10496 268 : return 0;
10497 272 : }
10498 :
10499 : struct spdk_bdev_for_each_io_ctx {
10500 : void *ctx;
10501 : spdk_bdev_io_fn fn;
10502 : spdk_bdev_for_each_io_cb cb;
10503 : };
10504 :
10505 : static void
10506 0 : bdev_channel_for_each_io(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
10507 : struct spdk_io_channel *io_ch, void *_ctx)
10508 : {
10509 0 : struct spdk_bdev_for_each_io_ctx *ctx = _ctx;
10510 0 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(io_ch);
10511 : struct spdk_bdev_io *bdev_io;
10512 0 : int rc = 0;
10513 :
10514 0 : TAILQ_FOREACH(bdev_io, &bdev_ch->io_submitted, internal.ch_link) {
10515 0 : rc = ctx->fn(ctx->ctx, bdev_io);
10516 0 : if (rc != 0) {
10517 0 : break;
10518 : }
10519 0 : }
10520 :
10521 0 : spdk_bdev_for_each_channel_continue(i, rc);
10522 0 : }
10523 :
10524 : static void
10525 0 : bdev_for_each_io_done(struct spdk_bdev *bdev, void *_ctx, int status)
10526 : {
10527 0 : struct spdk_bdev_for_each_io_ctx *ctx = _ctx;
10528 :
10529 0 : ctx->cb(ctx->ctx, status);
10530 :
10531 0 : free(ctx);
10532 0 : }
10533 :
10534 : void
10535 0 : spdk_bdev_for_each_bdev_io(struct spdk_bdev *bdev, void *_ctx, spdk_bdev_io_fn fn,
10536 : spdk_bdev_for_each_io_cb cb)
10537 : {
10538 : struct spdk_bdev_for_each_io_ctx *ctx;
10539 :
10540 0 : assert(fn != NULL && cb != NULL);
10541 :
10542 0 : ctx = calloc(1, sizeof(*ctx));
10543 0 : if (ctx == NULL) {
10544 0 : SPDK_ERRLOG("Failed to allocate context.\n");
10545 0 : cb(_ctx, -ENOMEM);
10546 0 : return;
10547 : }
10548 :
10549 0 : ctx->ctx = _ctx;
10550 0 : ctx->fn = fn;
10551 0 : ctx->cb = cb;
10552 :
10553 0 : spdk_bdev_for_each_channel(bdev, bdev_channel_for_each_io, ctx,
10554 : bdev_for_each_io_done);
10555 0 : }
10556 :
10557 : void
10558 132 : spdk_bdev_for_each_channel_continue(struct spdk_bdev_channel_iter *iter, int status)
10559 : {
10560 132 : spdk_for_each_channel_continue(iter->i, status);
10561 132 : }
10562 :
10563 : static struct spdk_bdev *
10564 361 : io_channel_iter_get_bdev(struct spdk_io_channel_iter *i)
10565 : {
10566 361 : void *io_device = spdk_io_channel_iter_get_io_device(i);
10567 :
10568 361 : return __bdev_from_io_dev(io_device);
10569 : }
10570 :
10571 : static void
10572 132 : bdev_each_channel_msg(struct spdk_io_channel_iter *i)
10573 : {
10574 132 : struct spdk_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
10575 132 : struct spdk_bdev *bdev = io_channel_iter_get_bdev(i);
10576 132 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
10577 :
10578 132 : iter->i = i;
10579 132 : iter->fn(iter, bdev, ch, iter->ctx);
10580 132 : }
10581 :
10582 : static void
10583 229 : bdev_each_channel_cpl(struct spdk_io_channel_iter *i, int status)
10584 : {
10585 229 : struct spdk_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
10586 229 : struct spdk_bdev *bdev = io_channel_iter_get_bdev(i);
10587 :
10588 229 : iter->i = i;
10589 229 : iter->cpl(bdev, iter->ctx, status);
10590 :
10591 229 : free(iter);
10592 229 : }
10593 :
10594 : void
10595 229 : spdk_bdev_for_each_channel(struct spdk_bdev *bdev, spdk_bdev_for_each_channel_msg fn,
10596 : void *ctx, spdk_bdev_for_each_channel_done cpl)
10597 : {
10598 : struct spdk_bdev_channel_iter *iter;
10599 :
10600 229 : assert(bdev != NULL && fn != NULL && ctx != NULL);
10601 :
10602 229 : iter = calloc(1, sizeof(struct spdk_bdev_channel_iter));
10603 229 : if (iter == NULL) {
10604 0 : SPDK_ERRLOG("Unable to allocate iterator\n");
10605 0 : assert(false);
10606 : return;
10607 : }
10608 :
10609 229 : iter->fn = fn;
10610 229 : iter->cpl = cpl;
10611 229 : iter->ctx = ctx;
10612 :
10613 458 : spdk_for_each_channel(__bdev_to_io_dev(bdev), bdev_each_channel_msg,
10614 229 : iter, bdev_each_channel_cpl);
10615 229 : }
10616 :
10617 : static void
10618 3 : bdev_copy_do_write_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
10619 : {
10620 3 : struct spdk_bdev_io *parent_io = cb_arg;
10621 :
10622 3 : spdk_bdev_free_io(bdev_io);
10623 :
10624 : /* Check return status of write */
10625 3 : parent_io->internal.status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
10626 3 : parent_io->internal.cb(parent_io, success, parent_io->internal.caller_ctx);
10627 3 : }
10628 :
10629 : static void
10630 3 : bdev_copy_do_write(void *_bdev_io)
10631 : {
10632 3 : struct spdk_bdev_io *bdev_io = _bdev_io;
10633 : int rc;
10634 :
10635 : /* Write blocks */
10636 6 : rc = spdk_bdev_write_blocks_with_md(bdev_io->internal.desc,
10637 3 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
10638 3 : bdev_io->u.bdev.iovs[0].iov_base,
10639 3 : bdev_io->u.bdev.md_buf, bdev_io->u.bdev.offset_blocks,
10640 3 : bdev_io->u.bdev.num_blocks, bdev_copy_do_write_done, bdev_io);
10641 :
10642 3 : if (rc == -ENOMEM) {
10643 0 : bdev_queue_io_wait_with_cb(bdev_io, bdev_copy_do_write);
10644 3 : } else if (rc != 0) {
10645 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
10646 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
10647 0 : }
10648 3 : }
10649 :
10650 : static void
10651 3 : bdev_copy_do_read_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
10652 : {
10653 3 : struct spdk_bdev_io *parent_io = cb_arg;
10654 :
10655 3 : spdk_bdev_free_io(bdev_io);
10656 :
10657 : /* Check return status of read */
10658 3 : if (!success) {
10659 0 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
10660 0 : parent_io->internal.cb(parent_io, false, parent_io->internal.caller_ctx);
10661 0 : return;
10662 : }
10663 :
10664 : /* Do write */
10665 3 : bdev_copy_do_write(parent_io);
10666 3 : }
10667 :
10668 : static void
10669 3 : bdev_copy_do_read(void *_bdev_io)
10670 : {
10671 3 : struct spdk_bdev_io *bdev_io = _bdev_io;
10672 : int rc;
10673 :
10674 : /* Read blocks */
10675 6 : rc = spdk_bdev_read_blocks_with_md(bdev_io->internal.desc,
10676 3 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
10677 3 : bdev_io->u.bdev.iovs[0].iov_base,
10678 3 : bdev_io->u.bdev.md_buf, bdev_io->u.bdev.copy.src_offset_blocks,
10679 3 : bdev_io->u.bdev.num_blocks, bdev_copy_do_read_done, bdev_io);
10680 :
10681 3 : if (rc == -ENOMEM) {
10682 0 : bdev_queue_io_wait_with_cb(bdev_io, bdev_copy_do_read);
10683 3 : } else if (rc != 0) {
10684 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
10685 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
10686 0 : }
10687 3 : }
10688 :
10689 : static void
10690 3 : bdev_copy_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
10691 : {
10692 3 : if (!success) {
10693 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
10694 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
10695 0 : return;
10696 : }
10697 :
10698 3 : bdev_copy_do_read(bdev_io);
10699 3 : }
10700 :
10701 : int
10702 27 : spdk_bdev_copy_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
10703 : uint64_t dst_offset_blocks, uint64_t src_offset_blocks, uint64_t num_blocks,
10704 : spdk_bdev_io_completion_cb cb, void *cb_arg)
10705 : {
10706 27 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
10707 : struct spdk_bdev_io *bdev_io;
10708 27 : struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
10709 :
10710 27 : if (!desc->write) {
10711 0 : return -EBADF;
10712 : }
10713 :
10714 27 : if (!bdev_io_valid_blocks(bdev, dst_offset_blocks, num_blocks) ||
10715 27 : !bdev_io_valid_blocks(bdev, src_offset_blocks, num_blocks)) {
10716 0 : SPDK_DEBUGLOG(bdev,
10717 : "Invalid offset or number of blocks: dst %lu, src %lu, count %lu\n",
10718 : dst_offset_blocks, src_offset_blocks, num_blocks);
10719 0 : return -EINVAL;
10720 : }
10721 :
10722 27 : bdev_io = bdev_channel_get_io(channel);
10723 27 : if (!bdev_io) {
10724 0 : return -ENOMEM;
10725 : }
10726 :
10727 27 : bdev_io->internal.ch = channel;
10728 27 : bdev_io->internal.desc = desc;
10729 27 : bdev_io->type = SPDK_BDEV_IO_TYPE_COPY;
10730 :
10731 27 : bdev_io->u.bdev.offset_blocks = dst_offset_blocks;
10732 27 : bdev_io->u.bdev.copy.src_offset_blocks = src_offset_blocks;
10733 27 : bdev_io->u.bdev.num_blocks = num_blocks;
10734 27 : bdev_io->u.bdev.memory_domain = NULL;
10735 27 : bdev_io->u.bdev.memory_domain_ctx = NULL;
10736 27 : bdev_io->u.bdev.iovs = NULL;
10737 27 : bdev_io->u.bdev.iovcnt = 0;
10738 27 : bdev_io->u.bdev.md_buf = NULL;
10739 27 : bdev_io->u.bdev.accel_sequence = NULL;
10740 27 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
10741 :
10742 27 : if (dst_offset_blocks == src_offset_blocks || num_blocks == 0) {
10743 0 : spdk_thread_send_msg(spdk_get_thread(), bdev_io_complete_cb, bdev_io);
10744 0 : return 0;
10745 : }
10746 :
10747 :
10748 : /* If the copy size is large and should be split, use the generic split logic
10749 : * regardless of whether SPDK_BDEV_IO_TYPE_COPY is supported or not.
10750 : *
10751 : * Then, send the copy request if SPDK_BDEV_IO_TYPE_COPY is supported or
10752 : * emulate it using regular read and write requests otherwise.
10753 : */
10754 27 : if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COPY) ||
10755 4 : bdev_io->internal.f.split) {
10756 24 : bdev_io_submit(bdev_io);
10757 24 : return 0;
10758 : }
10759 :
10760 3 : spdk_bdev_io_get_buf(bdev_io, bdev_copy_get_buf_cb, num_blocks * spdk_bdev_get_block_size(bdev));
10761 :
10762 3 : return 0;
10763 27 : }
10764 :
10765 3 : SPDK_LOG_REGISTER_COMPONENT(bdev)
10766 :
10767 : static void
10768 0 : bdev_trace(void)
10769 : {
10770 0 : struct spdk_trace_tpoint_opts opts[] = {
10771 : {
10772 : "BDEV_IO_START", TRACE_BDEV_IO_START,
10773 : OWNER_TYPE_BDEV, OBJECT_BDEV_IO, 1,
10774 : {
10775 : { "type", SPDK_TRACE_ARG_TYPE_INT, 8 },
10776 : { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 },
10777 : { "offset", SPDK_TRACE_ARG_TYPE_INT, 8 },
10778 : { "qd", SPDK_TRACE_ARG_TYPE_INT, 4 }
10779 : }
10780 : },
10781 : {
10782 : "BDEV_IO_DONE", TRACE_BDEV_IO_DONE,
10783 : OWNER_TYPE_BDEV, OBJECT_BDEV_IO, 0,
10784 : {
10785 : { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 },
10786 : { "qd", SPDK_TRACE_ARG_TYPE_INT, 4 }
10787 : }
10788 : },
10789 : {
10790 : "BDEV_IOCH_CREATE", TRACE_BDEV_IOCH_CREATE,
10791 : OWNER_TYPE_BDEV, OBJECT_NONE, 0,
10792 : {
10793 : { "tid", SPDK_TRACE_ARG_TYPE_INT, 8 }
10794 : }
10795 : },
10796 : {
10797 : "BDEV_IOCH_DESTROY", TRACE_BDEV_IOCH_DESTROY,
10798 : OWNER_TYPE_BDEV, OBJECT_NONE, 0,
10799 : {
10800 : { "tid", SPDK_TRACE_ARG_TYPE_INT, 8 }
10801 : }
10802 : },
10803 : };
10804 :
10805 :
10806 0 : spdk_trace_register_owner_type(OWNER_TYPE_BDEV, 'b');
10807 0 : spdk_trace_register_object(OBJECT_BDEV_IO, 'i');
10808 0 : spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
10809 0 : spdk_trace_tpoint_register_relation(TRACE_BDEV_NVME_IO_START, OBJECT_BDEV_IO, 0);
10810 0 : spdk_trace_tpoint_register_relation(TRACE_BDEV_NVME_IO_DONE, OBJECT_BDEV_IO, 0);
10811 0 : spdk_trace_tpoint_register_relation(TRACE_BLOB_REQ_SET_START, OBJECT_BDEV_IO, 0);
10812 0 : spdk_trace_tpoint_register_relation(TRACE_BLOB_REQ_SET_COMPLETE, OBJECT_BDEV_IO, 0);
10813 0 : spdk_trace_tpoint_register_relation(TRACE_BDEV_RAID_IO_START, OBJECT_BDEV_IO, 0);
10814 0 : spdk_trace_tpoint_register_relation(TRACE_BDEV_RAID_IO_DONE, OBJECT_BDEV_IO, 0);
10815 0 : }
10816 3 : SPDK_TRACE_REGISTER_FN(bdev_trace, "bdev", TRACE_GROUP_BDEV)
|