Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2022 Intel Corporation.
3 : * Copyright (c) Samsung Electronics Co., Ltd.
4 : * All rights reserved.
5 : */
6 :
7 : #include "libxnvme.h"
8 :
9 : #include "bdev_xnvme.h"
10 :
11 : #include "spdk/stdinc.h"
12 :
13 : #include "spdk/barrier.h"
14 : #include "spdk/bdev.h"
15 : #include "spdk/env.h"
16 : #include "spdk/fd.h"
17 : #include "spdk/likely.h"
18 : #include "spdk/thread.h"
19 : #include "spdk/json.h"
20 : #include "spdk/util.h"
21 : #include "spdk/string.h"
22 :
23 : #include "spdk/log.h"
24 :
25 : struct bdev_xnvme_io_channel {
26 : struct xnvme_queue *queue;
27 : struct spdk_poller *poller;
28 : };
29 :
30 : struct bdev_xnvme_task {
31 : struct bdev_xnvme_io_channel *ch;
32 : TAILQ_ENTRY(bdev_xnvme_task) link;
33 : };
34 :
35 : struct bdev_xnvme {
36 : struct spdk_bdev bdev;
37 : char *filename;
38 : char *io_mechanism;
39 : struct xnvme_dev *dev;
40 : uint32_t nsid;
41 : bool conserve_cpu;
42 :
43 : TAILQ_ENTRY(bdev_xnvme) link;
44 : };
45 :
46 : static int bdev_xnvme_init(void);
47 : static void bdev_xnvme_fini(void);
48 : static void bdev_xnvme_free(struct bdev_xnvme *xnvme);
49 : static TAILQ_HEAD(, bdev_xnvme) g_xnvme_bdev_head = TAILQ_HEAD_INITIALIZER(g_xnvme_bdev_head);
50 :
51 : static int
52 0 : bdev_xnvme_get_ctx_size(void)
53 : {
54 0 : return sizeof(struct bdev_xnvme_task);
55 : }
56 :
57 : static int
58 0 : bdev_xnvme_config_json(struct spdk_json_write_ctx *w)
59 : {
60 0 : struct bdev_xnvme *xnvme;
61 :
62 0 : TAILQ_FOREACH(xnvme, &g_xnvme_bdev_head, link) {
63 0 : spdk_json_write_object_begin(w);
64 :
65 0 : spdk_json_write_named_string(w, "method", "bdev_xnvme_create");
66 :
67 0 : spdk_json_write_named_object_begin(w, "params");
68 0 : spdk_json_write_named_string(w, "name", xnvme->bdev.name);
69 0 : spdk_json_write_named_string(w, "filename", xnvme->filename);
70 0 : spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism);
71 0 : spdk_json_write_named_bool(w, "conserve_cpu", xnvme->conserve_cpu);
72 0 : spdk_json_write_object_end(w);
73 :
74 0 : spdk_json_write_object_end(w);
75 0 : }
76 :
77 0 : return 0;
78 0 : }
79 :
80 : static struct spdk_bdev_module xnvme_if = {
81 : .name = "xnvme",
82 : .module_init = bdev_xnvme_init,
83 : .module_fini = bdev_xnvme_fini,
84 : .get_ctx_size = bdev_xnvme_get_ctx_size,
85 : .config_json = bdev_xnvme_config_json,
86 : };
87 :
88 0 : SPDK_BDEV_MODULE_REGISTER(xnvme, &xnvme_if)
89 :
90 : static struct spdk_io_channel *
91 0 : bdev_xnvme_get_io_channel(void *ctx)
92 : {
93 0 : struct bdev_xnvme *xnvme = ctx;
94 :
95 0 : return spdk_get_io_channel(xnvme);
96 0 : }
97 :
98 : static bool
99 0 : bdev_xnvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
100 : {
101 0 : switch (io_type) {
102 : case SPDK_BDEV_IO_TYPE_READ:
103 : case SPDK_BDEV_IO_TYPE_WRITE:
104 0 : return true;
105 : default:
106 0 : return false;
107 : }
108 0 : }
109 :
110 : static void
111 0 : bdev_xnvme_destruct_cb(void *io_device)
112 : {
113 0 : struct bdev_xnvme *xnvme = io_device;
114 :
115 0 : TAILQ_REMOVE(&g_xnvme_bdev_head, xnvme, link);
116 0 : bdev_xnvme_free(xnvme);
117 0 : }
118 :
119 : static int
120 0 : bdev_xnvme_destruct(void *ctx)
121 : {
122 0 : struct bdev_xnvme *xnvme = ctx;
123 :
124 0 : spdk_io_device_unregister(xnvme, bdev_xnvme_destruct_cb);
125 :
126 0 : return 0;
127 0 : }
128 :
129 : static void
130 0 : _xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
131 : {
132 0 : struct bdev_xnvme_task *xnvme_task = (struct bdev_xnvme_task *)bdev_io->driver_ctx;
133 0 : struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev_io->bdev->ctxt;
134 0 : struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch);
135 0 : struct xnvme_cmd_ctx *ctx = xnvme_queue_get_cmd_ctx(xnvme_ch->queue);
136 0 : int err;
137 :
138 0 : SPDK_DEBUGLOG(xnvme, "bdev_io : %p, iov_cnt : %d, bdev_xnvme_task : %p\n",
139 : bdev_io, bdev_io->u.bdev.iovcnt, (struct bdev_xnvme_task *)bdev_io->driver_ctx);
140 :
141 0 : switch (bdev_io->type) {
142 : case SPDK_BDEV_IO_TYPE_READ:
143 0 : ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
144 0 : ctx->cmd.common.nsid = xnvme->nsid;
145 0 : ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
146 0 : ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
147 0 : break;
148 : case SPDK_BDEV_IO_TYPE_WRITE:
149 0 : ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
150 0 : ctx->cmd.common.nsid = xnvme->nsid;
151 0 : ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
152 0 : ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
153 0 : break;
154 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
155 0 : ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE_ZEROES;
156 0 : ctx->cmd.common.nsid = xnvme->nsid;
157 0 : ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
158 0 : ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
159 0 : break;
160 : default:
161 0 : SPDK_ERRLOG("Wrong io type\n");
162 :
163 0 : xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
164 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
165 0 : return;
166 : }
167 :
168 0 : xnvme_task->ch = xnvme_ch;
169 0 : ctx->async.cb_arg = xnvme_task;
170 :
171 0 : err = xnvme_cmd_passv(ctx, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
172 0 : bdev_io->u.bdev.num_blocks * xnvme->bdev.blocklen, NULL, 0, 0);
173 :
174 0 : switch (err) {
175 : /* Submission success! */
176 : case 0:
177 0 : SPDK_DEBUGLOG(xnvme, "io_channel : %p, iovcnt:%d, nblks: %lu off: %#lx\n",
178 : xnvme_ch, bdev_io->u.bdev.iovcnt,
179 : bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks);
180 0 : return;
181 :
182 : /* Submission failed: queue is full or no memory => Queue the I/O in bdev layer */
183 : case -EBUSY:
184 : case -EAGAIN:
185 : case -ENOMEM:
186 0 : SPDK_WARNLOG("Start to queue I/O for xnvme bdev\n");
187 :
188 0 : xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
189 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
190 0 : return;
191 :
192 : /* Submission failed: unexpected error, put the command-context back in the queue */
193 : default:
194 0 : SPDK_ERRLOG("bdev_xnvme_cmd_passv : Submission failed: unexpected error\n");
195 :
196 0 : xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
197 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
198 0 : return;
199 : }
200 0 : }
201 :
202 : static void
203 0 : bdev_xnvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
204 : {
205 0 : struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch);
206 :
207 0 : if (!success) {
208 0 : xnvme_queue_put_cmd_ctx(xnvme_ch->queue, xnvme_queue_get_cmd_ctx(xnvme_ch->queue));
209 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
210 0 : return;
211 : }
212 :
213 0 : _xnvme_submit_request(ch, bdev_io);
214 0 : }
215 :
216 : static void
217 0 : bdev_xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
218 : {
219 0 : switch (bdev_io->type) {
220 : /* Read and write operations must be performed on buffers aligned to
221 : * bdev->required_alignment. If user specified unaligned buffers,
222 : * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
223 : case SPDK_BDEV_IO_TYPE_READ:
224 : case SPDK_BDEV_IO_TYPE_WRITE:
225 0 : spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb,
226 0 : bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
227 0 : break;
228 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
229 0 : _xnvme_submit_request(ch, bdev_io);
230 0 : break;
231 :
232 : default:
233 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
234 0 : break;
235 : }
236 0 : }
237 :
238 : static const struct spdk_bdev_fn_table xnvme_fn_table = {
239 : .destruct = bdev_xnvme_destruct,
240 : .submit_request = bdev_xnvme_submit_request,
241 : .io_type_supported = bdev_xnvme_io_type_supported,
242 : .get_io_channel = bdev_xnvme_get_io_channel,
243 : };
244 :
245 : static void
246 0 : bdev_xnvme_free(struct bdev_xnvme *xnvme)
247 : {
248 0 : assert(xnvme != NULL);
249 :
250 0 : xnvme_dev_close(xnvme->dev);
251 0 : free(xnvme->io_mechanism);
252 0 : free(xnvme->filename);
253 0 : free(xnvme->bdev.name);
254 0 : free(xnvme);
255 0 : }
256 :
257 : static void
258 0 : bdev_xnvme_cmd_cb(struct xnvme_cmd_ctx *ctx, void *cb_arg)
259 : {
260 0 : struct bdev_xnvme_task *xnvme_task = ctx->async.cb_arg;
261 0 : enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
262 :
263 0 : SPDK_DEBUGLOG(xnvme, "xnvme_task : %p\n", xnvme_task);
264 :
265 0 : if (xnvme_cmd_ctx_cpl_status(ctx)) {
266 0 : SPDK_ERRLOG("xNVMe I/O Failed\n");
267 0 : xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
268 0 : status = SPDK_BDEV_IO_STATUS_FAILED;
269 0 : }
270 :
271 0 : spdk_bdev_io_complete(spdk_bdev_io_from_ctx(xnvme_task), status);
272 :
273 : /* Completed: Put the command- context back in the queue */
274 0 : xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
275 0 : }
276 :
277 : static int
278 0 : bdev_xnvme_poll(void *arg)
279 : {
280 0 : struct bdev_xnvme_io_channel *ch = arg;
281 0 : int rc;
282 :
283 0 : rc = xnvme_queue_poke(ch->queue, 0);
284 0 : if (rc < 0) {
285 0 : SPDK_ERRLOG("xnvme_queue_poke failure rc : %d\n", rc);
286 0 : return SPDK_POLLER_BUSY;
287 : }
288 :
289 0 : return xnvme_queue_get_outstanding(ch->queue) ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
290 0 : }
291 :
292 : static int
293 0 : bdev_xnvme_queue_create_cb(void *io_device, void *ctx_buf)
294 : {
295 0 : struct bdev_xnvme *xnvme = io_device;
296 0 : struct bdev_xnvme_io_channel *ch = ctx_buf;
297 0 : int rc;
298 0 : int qd = 512;
299 :
300 0 : rc = xnvme_queue_init(xnvme->dev, qd, 0, &ch->queue);
301 0 : if (rc) {
302 0 : SPDK_ERRLOG("xnvme_queue_init failure: %d\n", rc);
303 0 : return 1;
304 : }
305 :
306 0 : xnvme_queue_set_cb(ch->queue, bdev_xnvme_cmd_cb, ch);
307 :
308 0 : ch->poller = SPDK_POLLER_REGISTER(bdev_xnvme_poll, ch, 0);
309 :
310 0 : return 0;
311 0 : }
312 :
313 : static void
314 0 : bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf)
315 : {
316 0 : struct bdev_xnvme_io_channel *ch = ctx_buf;
317 :
318 0 : spdk_poller_unregister(&ch->poller);
319 :
320 0 : xnvme_queue_term(ch->queue);
321 0 : }
322 :
323 : struct spdk_bdev *
324 0 : create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism,
325 : bool conserve_cpu)
326 : {
327 0 : struct bdev_xnvme *xnvme;
328 0 : uint32_t block_size;
329 0 : uint64_t bdev_size;
330 0 : int rc;
331 0 : struct xnvme_opts opts = xnvme_opts_default();
332 :
333 0 : xnvme = calloc(1, sizeof(*xnvme));
334 0 : if (!xnvme) {
335 0 : SPDK_ERRLOG("Unable to allocate enough memory for xNVMe backend\n");
336 0 : return NULL;
337 : }
338 :
339 0 : opts.direct = 1;
340 0 : opts.async = io_mechanism;
341 0 : if (!opts.async) {
342 0 : goto error_return;
343 : }
344 0 : xnvme->io_mechanism = strdup(io_mechanism);
345 0 : if (!xnvme->io_mechanism) {
346 0 : goto error_return;
347 : }
348 :
349 0 : if (!conserve_cpu) {
350 0 : if (!strcmp(xnvme->io_mechanism, "libaio")) {
351 0 : opts.poll_io = 1;
352 0 : } else if (!strcmp(xnvme->io_mechanism, "io_uring")) {
353 0 : opts.poll_io = 1;
354 0 : } else if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) {
355 0 : opts.poll_io = 1;
356 0 : }
357 0 : }
358 :
359 0 : xnvme->filename = strdup(filename);
360 0 : if (!xnvme->filename) {
361 0 : goto error_return;
362 : }
363 :
364 0 : xnvme->dev = xnvme_dev_open(xnvme->filename, &opts);
365 0 : if (!xnvme->dev) {
366 0 : SPDK_ERRLOG("Unable to open xNVMe device %s\n", filename);
367 0 : goto error_return;
368 : }
369 :
370 0 : xnvme->nsid = xnvme_dev_get_nsid(xnvme->dev);
371 :
372 0 : bdev_size = xnvme_dev_get_geo(xnvme->dev)->tbytes;
373 0 : block_size = xnvme_dev_get_geo(xnvme->dev)->nbytes;
374 :
375 0 : xnvme->bdev.name = strdup(name);
376 0 : if (!xnvme->bdev.name) {
377 0 : goto error_return;
378 : }
379 :
380 0 : xnvme->bdev.product_name = "xNVMe bdev";
381 0 : xnvme->bdev.module = &xnvme_if;
382 :
383 0 : xnvme->bdev.write_cache = 0;
384 0 : xnvme->bdev.max_write_zeroes = UINT16_MAX + 1;
385 :
386 0 : if (block_size == 0) {
387 0 : SPDK_ERRLOG("Block size could not be auto-detected\n");
388 0 : goto error_return;
389 : }
390 :
391 0 : if (block_size < 512) {
392 0 : SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
393 0 : goto error_return;
394 : }
395 :
396 0 : if (!spdk_u32_is_pow2(block_size)) {
397 0 : SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
398 0 : goto error_return;
399 : }
400 :
401 0 : SPDK_DEBUGLOG(xnvme, "bdev_name : %s, bdev_size : %lu, block_size : %d\n",
402 : xnvme->bdev.name, bdev_size, block_size);
403 :
404 0 : xnvme->bdev.blocklen = block_size;
405 0 : xnvme->bdev.required_alignment = spdk_u32log2(block_size);
406 :
407 0 : if (bdev_size % xnvme->bdev.blocklen != 0) {
408 0 : SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
409 : bdev_size, xnvme->bdev.blocklen);
410 0 : goto error_return;
411 : }
412 :
413 0 : xnvme->bdev.blockcnt = bdev_size / xnvme->bdev.blocklen;
414 0 : xnvme->bdev.ctxt = xnvme;
415 :
416 0 : xnvme->bdev.fn_table = &xnvme_fn_table;
417 :
418 0 : spdk_io_device_register(xnvme, bdev_xnvme_queue_create_cb, bdev_xnvme_queue_destroy_cb,
419 : sizeof(struct bdev_xnvme_io_channel),
420 0 : xnvme->bdev.name);
421 0 : rc = spdk_bdev_register(&xnvme->bdev);
422 0 : if (rc) {
423 0 : spdk_io_device_unregister(xnvme, NULL);
424 0 : goto error_return;
425 : }
426 :
427 0 : TAILQ_INSERT_TAIL(&g_xnvme_bdev_head, xnvme, link);
428 :
429 0 : return &xnvme->bdev;
430 :
431 : error_return:
432 0 : bdev_xnvme_free(xnvme);
433 0 : return NULL;
434 0 : }
435 :
436 : void
437 0 : delete_xnvme_bdev(const char *name, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
438 : {
439 0 : int rc;
440 :
441 0 : rc = spdk_bdev_unregister_by_name(name, &xnvme_if, cb_fn, cb_arg);
442 0 : if (rc != 0) {
443 0 : cb_fn(cb_arg, rc);
444 0 : }
445 0 : }
446 :
447 : static int
448 0 : bdev_xnvme_module_create_cb(void *io_device, void *ctx_buf)
449 : {
450 0 : return 0;
451 : }
452 :
453 : static void
454 0 : bdev_xnvme_module_destroy_cb(void *io_device, void *ctx_buf)
455 : {
456 0 : }
457 :
458 : static int
459 0 : bdev_xnvme_init(void)
460 : {
461 0 : spdk_io_device_register(&xnvme_if, bdev_xnvme_module_create_cb, bdev_xnvme_module_destroy_cb,
462 : 0, "xnvme_module");
463 :
464 0 : return 0;
465 : }
466 :
467 : static void
468 0 : bdev_xnvme_fini(void)
469 : {
470 0 : spdk_io_device_unregister(&xnvme_if, NULL);
471 0 : }
472 :
473 0 : SPDK_LOG_REGISTER_COMPONENT(xnvme)
|