Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (c) croit GmbH.
3 : * All rights reserved.
4 : * Copyright (c) 2023 Enakta Labs
5 : * All rights reserved.
6 : */
7 :
8 : #include <sys/queue.h>
9 :
10 : #include "spdk/bdev.h"
11 : #include "spdk/bdev_module.h"
12 : #include "spdk/endian.h"
13 : #include "spdk/env.h"
14 : #include "spdk/json.h"
15 : #include "spdk/thread.h"
16 : #include "spdk/queue.h"
17 : #include "spdk/string.h"
18 : #include "spdk/stdinc.h"
19 : #include "spdk/log.h"
20 :
21 : #include <daos.h>
22 : #include <daos_event.h>
23 : #include <daos_fs.h>
24 : #include <daos_types.h>
25 : #include <daos_pool.h>
26 : #include <daos_cont.h>
27 : #include <daos_errno.h>
28 :
29 : #include "bdev_daos.h"
30 :
31 : struct bdev_daos_task {
32 : daos_event_t ev;
33 : struct spdk_thread *submit_td;
34 : struct spdk_bdev_io *bdev_io;
35 :
36 : int io_status;
37 : uint64_t offset;
38 :
39 : /* DAOS version of iovec and scatter/gather */
40 : daos_size_t read_size;
41 : d_iov_t diovs[SPDK_BDEV_IO_NUM_CHILD_IOV];
42 : d_sg_list_t sgl;
43 : };
44 :
45 : struct bdev_daos {
46 : struct spdk_bdev disk;
47 : daos_oclass_id_t oclass;
48 :
49 : char pool_name[DAOS_PROP_MAX_LABEL_BUF_LEN];
50 : char cont_name[DAOS_PROP_MAX_LABEL_BUF_LEN];
51 :
52 : struct bdev_daos_task *reset_task;
53 : struct spdk_poller *reset_retry_timer;
54 : };
55 :
56 : struct bdev_daos_io_channel {
57 : struct bdev_daos *disk;
58 : struct spdk_poller *poller;
59 :
60 : daos_handle_t pool;
61 : daos_handle_t cont;
62 :
63 : dfs_t *dfs;
64 : dfs_obj_t *obj;
65 : daos_handle_t queue;
66 : };
67 :
68 : static uint32_t g_bdev_daos_init_count = 0;
69 : static pthread_mutex_t g_bdev_daos_init_mutex = PTHREAD_MUTEX_INITIALIZER;
70 :
71 : static int bdev_daos_initialize(void);
72 :
73 : static int bdev_daos_get_engine(void);
74 : static int bdev_daos_put_engine(void);
75 :
76 : static int
77 0 : bdev_daos_get_ctx_size(void)
78 : {
79 0 : return sizeof(struct bdev_daos_task);
80 : }
81 :
82 : static struct spdk_bdev_module daos_if = {
83 : .name = "daos",
84 : .module_init = bdev_daos_initialize,
85 : .get_ctx_size = bdev_daos_get_ctx_size,
86 : };
87 :
88 0 : SPDK_BDEV_MODULE_REGISTER(daos, &daos_if)
89 :
90 :
91 : /* Convert DAOS errors to closest POSIX errno
92 : * This is pretty much copy of daos_der2errno()
93 : * from https://github.com/daos-stack/daos/blob/master/src/include/daos/common.h
94 : * but unfortunately it's not exported in DAOS packages
95 : */
96 : static inline int
97 0 : daos2posix_errno(int err)
98 : {
99 0 : if (err > 0) {
100 0 : return EINVAL;
101 : }
102 :
103 0 : switch (err) {
104 0 : case -DER_SUCCESS:
105 0 : return 0;
106 0 : case -DER_NO_PERM:
107 : case -DER_EP_RO:
108 : case -DER_EP_OLD:
109 0 : return EPERM;
110 0 : case -DER_ENOENT:
111 : case -DER_NONEXIST:
112 0 : return ENOENT;
113 0 : case -DER_INVAL:
114 : case -DER_NOTYPE:
115 : case -DER_NOSCHEMA:
116 : case -DER_NOLOCAL:
117 : case -DER_NO_HDL:
118 : case -DER_IO_INVAL:
119 0 : return EINVAL;
120 0 : case -DER_KEY2BIG:
121 : case -DER_REC2BIG:
122 0 : return E2BIG;
123 0 : case -DER_EXIST:
124 0 : return EEXIST;
125 0 : case -DER_UNREACH:
126 0 : return EHOSTUNREACH;
127 0 : case -DER_NOSPACE:
128 0 : return ENOSPC;
129 0 : case -DER_ALREADY:
130 0 : return EALREADY;
131 0 : case -DER_NOMEM:
132 0 : return ENOMEM;
133 0 : case -DER_TIMEDOUT:
134 0 : return ETIMEDOUT;
135 0 : case -DER_BUSY:
136 : case -DER_EQ_BUSY:
137 0 : return EBUSY;
138 0 : case -DER_AGAIN:
139 0 : return EAGAIN;
140 0 : case -DER_PROTO:
141 0 : return EPROTO;
142 0 : case -DER_IO:
143 0 : return EIO;
144 0 : case -DER_CANCELED:
145 : case DER_OP_CANCELED:
146 0 : return ECANCELED;
147 0 : case -DER_OVERFLOW:
148 0 : return EOVERFLOW;
149 0 : case -DER_BADPATH:
150 : case -DER_NOTDIR:
151 0 : return ENOTDIR;
152 0 : case -DER_STALE:
153 0 : return ESTALE;
154 0 : case -DER_TX_RESTART:
155 0 : return ERESTART;
156 0 : default:
157 0 : return EIO;
158 : }
159 : };
160 :
161 : static void
162 0 : bdev_daos_free(struct bdev_daos *bdev_daos)
163 : {
164 0 : if (!bdev_daos) {
165 0 : return;
166 : }
167 :
168 0 : free(bdev_daos->disk.name);
169 0 : free(bdev_daos);
170 : }
171 :
172 : static void
173 0 : bdev_daos_destruct_cb(void *io_device)
174 : {
175 : int rc;
176 0 : struct bdev_daos *daos = io_device;
177 :
178 0 : assert(daos != NULL);
179 :
180 0 : bdev_daos_free(daos);
181 :
182 0 : rc = bdev_daos_put_engine();
183 0 : if (rc) {
184 0 : SPDK_ERRLOG("could not de-initialize DAOS engine: " DF_RC "\n", DP_RC(rc));
185 : }
186 0 : }
187 :
188 : static int
189 0 : bdev_daos_destruct(void *ctx)
190 : {
191 0 : struct bdev_daos *daos = ctx;
192 :
193 0 : SPDK_NOTICELOG("%s: destroying bdev_daos device\n", daos->disk.name);
194 :
195 0 : spdk_io_device_unregister(daos, bdev_daos_destruct_cb);
196 :
197 0 : return 0;
198 : }
199 :
200 : static void
201 0 : _bdev_daos_io_complete(void *bdev_daos_task)
202 : {
203 0 : struct bdev_daos_task *task = bdev_daos_task;
204 :
205 0 : SPDK_DEBUGLOG(bdev_daos, "completed IO at %#lx with status %s (errno=%d)\n",
206 : task->offset, task->io_status ? "FAILURE" : "SUCCESS", task->io_status);
207 :
208 0 : if (task->io_status == 0) {
209 0 : spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS);
210 : } else {
211 0 : spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(task), task->io_status);
212 : }
213 0 : }
214 :
215 : static void
216 0 : bdev_daos_io_complete(struct spdk_bdev_io *bdev_io, int io_status)
217 : {
218 0 : struct bdev_daos_task *task = (struct bdev_daos_task *)bdev_io->driver_ctx;
219 0 : struct spdk_thread *current_thread = spdk_get_thread();
220 :
221 0 : assert(task->submit_td != NULL);
222 :
223 0 : task->io_status = io_status;
224 0 : if (task->submit_td != current_thread) {
225 0 : spdk_thread_send_msg(task->submit_td, _bdev_daos_io_complete, task);
226 : } else {
227 0 : _bdev_daos_io_complete(task);
228 : }
229 0 : }
230 :
231 : static int64_t
232 0 : bdev_daos_writev(struct bdev_daos *daos, struct bdev_daos_io_channel *ch,
233 : struct bdev_daos_task *task,
234 : struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
235 : {
236 : int rc;
237 :
238 0 : SPDK_DEBUGLOG(bdev_daos, "write %d iovs size %lu to off: %#lx\n",
239 : iovcnt, nbytes, offset);
240 :
241 0 : assert(ch != NULL);
242 0 : assert(daos != NULL);
243 0 : assert(task != NULL);
244 0 : assert(iov != NULL);
245 :
246 0 : if (iovcnt > SPDK_BDEV_IO_NUM_CHILD_IOV) {
247 0 : SPDK_ERRLOG("iovs number [%d] exceeds max allowed limit [%d]\n", iovcnt,
248 : SPDK_BDEV_IO_NUM_CHILD_IOV);
249 0 : return -E2BIG;
250 : }
251 :
252 0 : if ((rc = daos_event_init(&task->ev, ch->queue, NULL))) {
253 0 : SPDK_ERRLOG("%s: could not initialize async event: " DF_RC "\n",
254 : daos->disk.name, DP_RC(rc));
255 0 : return -daos2posix_errno(rc);
256 : }
257 :
258 0 : for (int i = 0; i < iovcnt; i++, iov++) {
259 0 : d_iov_set(&(task->diovs[i]), iov->iov_base, iov->iov_len);
260 : }
261 :
262 0 : task->sgl.sg_nr = iovcnt;
263 0 : task->sgl.sg_nr_out = 0;
264 0 : task->sgl.sg_iovs = task->diovs;
265 0 : task->offset = offset;
266 :
267 0 : if ((rc = dfs_write(ch->dfs, ch->obj, &task->sgl, offset, &task->ev))) {
268 0 : SPDK_ERRLOG("%s: could not start async write: %s\n",
269 : daos->disk.name, strerror(rc));
270 0 : daos_event_fini(&task->ev);
271 0 : return -rc;
272 : }
273 :
274 0 : return nbytes;
275 : }
276 :
277 : static int64_t
278 0 : bdev_daos_readv(struct bdev_daos *daos, struct bdev_daos_io_channel *ch,
279 : struct bdev_daos_task *task,
280 : struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
281 : {
282 : int rc;
283 :
284 0 : SPDK_DEBUGLOG(bdev_daos, "read %d iovs size %lu to off: %#lx\n",
285 : iovcnt, nbytes, offset);
286 :
287 0 : assert(ch != NULL);
288 0 : assert(daos != NULL);
289 0 : assert(task != NULL);
290 0 : assert(iov != NULL);
291 :
292 0 : if (iovcnt > SPDK_BDEV_IO_NUM_CHILD_IOV) {
293 0 : SPDK_ERRLOG("iovs number [%d] exceeds max allowed limit [%d]\n", iovcnt,
294 : SPDK_BDEV_IO_NUM_CHILD_IOV);
295 0 : return -E2BIG;
296 : }
297 :
298 0 : if ((rc = daos_event_init(&task->ev, ch->queue, NULL))) {
299 0 : SPDK_ERRLOG("%s: could not initialize async event: " DF_RC "\n",
300 : daos->disk.name, DP_RC(rc));
301 0 : return -daos2posix_errno(rc);
302 : }
303 :
304 0 : for (int i = 0; i < iovcnt; i++, iov++) {
305 0 : d_iov_set(&(task->diovs[i]), iov->iov_base, iov->iov_len);
306 : }
307 :
308 0 : task->sgl.sg_nr = iovcnt;
309 0 : task->sgl.sg_nr_out = 0;
310 0 : task->sgl.sg_iovs = task->diovs;
311 0 : task->offset = offset;
312 :
313 0 : if ((rc = dfs_read(ch->dfs, ch->obj, &task->sgl, offset, &task->read_size, &task->ev))) {
314 0 : SPDK_ERRLOG("%s: could not start async read: %s\n",
315 : daos->disk.name, strerror(rc));
316 0 : daos_event_fini(&task->ev);
317 0 : return -rc;
318 : }
319 :
320 0 : return nbytes;
321 : }
322 :
323 : static void
324 0 : bdev_daos_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
325 : bool success)
326 : {
327 : int64_t rc;
328 0 : struct bdev_daos_io_channel *dch = spdk_io_channel_get_ctx(ch);
329 :
330 0 : if (!success) {
331 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
332 0 : return;
333 : }
334 :
335 0 : rc = bdev_daos_readv((struct bdev_daos *)bdev_io->bdev->ctxt,
336 : dch,
337 0 : (struct bdev_daos_task *)bdev_io->driver_ctx,
338 : bdev_io->u.bdev.iovs,
339 : bdev_io->u.bdev.iovcnt,
340 0 : bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
341 0 : bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
342 :
343 0 : if (rc < 0) {
344 0 : spdk_bdev_io_complete_aio_status(bdev_io, rc);
345 0 : return;
346 : }
347 : }
348 :
349 : static void
350 0 : _bdev_daos_get_io_inflight(struct spdk_io_channel_iter *i)
351 : {
352 0 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
353 0 : struct bdev_daos_io_channel *dch = spdk_io_channel_get_ctx(ch);
354 0 : int io_inflight = daos_eq_query(dch->queue, DAOS_EQR_WAITING, 0, NULL);
355 :
356 0 : if (io_inflight > 0) {
357 0 : spdk_for_each_channel_continue(i, -1);
358 0 : return;
359 : }
360 :
361 0 : spdk_for_each_channel_continue(i, 0);
362 : }
363 :
364 : static int bdev_daos_reset_retry_timer(void *arg);
365 :
366 : static void
367 0 : _bdev_daos_get_io_inflight_done(struct spdk_io_channel_iter *i, int status)
368 : {
369 0 : struct bdev_daos *daos = spdk_io_channel_iter_get_ctx(i);
370 :
371 0 : if (status == -1) {
372 0 : daos->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_daos_reset_retry_timer, daos, 1000);
373 0 : return;
374 : }
375 :
376 0 : spdk_bdev_io_complete(spdk_bdev_io_from_ctx(daos->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS);
377 : }
378 :
379 : static int
380 0 : bdev_daos_reset_retry_timer(void *arg)
381 : {
382 0 : struct bdev_daos *daos = arg;
383 :
384 0 : if (daos->reset_retry_timer) {
385 0 : spdk_poller_unregister(&daos->reset_retry_timer);
386 : }
387 :
388 0 : spdk_for_each_channel(daos,
389 : _bdev_daos_get_io_inflight,
390 : daos,
391 : _bdev_daos_get_io_inflight_done);
392 :
393 0 : return SPDK_POLLER_BUSY;
394 : }
395 :
396 : static void
397 0 : bdev_daos_reset(struct bdev_daos *daos, struct bdev_daos_task *task)
398 : {
399 0 : assert(daos != NULL);
400 0 : assert(task != NULL);
401 :
402 0 : daos->reset_task = task;
403 0 : bdev_daos_reset_retry_timer(daos);
404 0 : }
405 :
406 :
407 : static int64_t
408 0 : bdev_daos_unmap(struct bdev_daos_io_channel *ch, uint64_t nbytes,
409 : uint64_t offset)
410 : {
411 0 : int rc = 0;
412 :
413 0 : SPDK_DEBUGLOG(bdev_daos, "unmap at %#lx with size %#lx\n", offset, nbytes);
414 0 : if ((rc = dfs_punch(ch->dfs, ch->obj, offset, nbytes))) {
415 0 : return -rc;
416 : }
417 0 : return 0;
418 : }
419 :
420 : static void
421 0 : _bdev_daos_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
422 : {
423 0 : struct bdev_daos_io_channel *dch = spdk_io_channel_get_ctx(ch);
424 :
425 : int64_t rc;
426 0 : switch (bdev_io->type) {
427 0 : case SPDK_BDEV_IO_TYPE_READ:
428 0 : spdk_bdev_io_get_buf(bdev_io, bdev_daos_get_buf_cb,
429 0 : bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
430 0 : break;
431 :
432 0 : case SPDK_BDEV_IO_TYPE_WRITE:
433 0 : rc = bdev_daos_writev((struct bdev_daos *)bdev_io->bdev->ctxt,
434 : dch,
435 0 : (struct bdev_daos_task *)bdev_io->driver_ctx,
436 : bdev_io->u.bdev.iovs,
437 : bdev_io->u.bdev.iovcnt,
438 0 : bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
439 0 : bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
440 0 : if (rc < 0) {
441 0 : spdk_bdev_io_complete_aio_status(bdev_io, rc);
442 0 : return;
443 : }
444 0 : break;
445 :
446 0 : case SPDK_BDEV_IO_TYPE_RESET:
447 : /* Can't cancel in-flight requests, but can wait for their completions */
448 0 : bdev_daos_reset((struct bdev_daos *)bdev_io->bdev->ctxt,
449 0 : (struct bdev_daos_task *)bdev_io->driver_ctx);
450 0 : break;
451 :
452 0 : case SPDK_BDEV_IO_TYPE_FLUSH:
453 : /* NOOP because DAOS requests land on PMEM and writes are persistent upon completion */
454 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
455 0 : break;
456 :
457 0 : case SPDK_BDEV_IO_TYPE_UNMAP:
458 0 : rc = bdev_daos_unmap(dch,
459 0 : bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
460 0 : bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
461 0 : if (!rc) {
462 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
463 : } else {
464 0 : SPDK_DEBUGLOG(bdev_daos, "%s: could not unmap: %s",
465 : dch->disk->disk.name, strerror(-rc));
466 0 : spdk_bdev_io_complete_aio_status(bdev_io, rc);
467 : }
468 :
469 0 : break;
470 :
471 0 : default:
472 0 : SPDK_ERRLOG("Wrong io type\n");
473 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
474 0 : break;
475 : }
476 : }
477 :
478 : static void
479 0 : bdev_daos_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
480 : {
481 0 : struct bdev_daos_task *task = (struct bdev_daos_task *)bdev_io->driver_ctx;
482 0 : struct spdk_thread *submit_td = spdk_io_channel_get_thread(ch);
483 :
484 0 : assert(task != NULL);
485 :
486 0 : task->submit_td = submit_td;
487 0 : task->bdev_io = bdev_io;
488 :
489 0 : _bdev_daos_submit_request(ch, bdev_io);
490 0 : }
491 :
492 : #define POLLING_EVENTS_NUM 64
493 :
494 : static int
495 0 : bdev_daos_channel_poll(void *arg)
496 : {
497 0 : daos_event_t *evp[POLLING_EVENTS_NUM];
498 0 : struct bdev_daos_io_channel *ch = arg;
499 :
500 0 : assert(ch != NULL);
501 0 : assert(ch->disk != NULL);
502 :
503 0 : int rc = daos_eq_poll(ch->queue, 0, DAOS_EQ_NOWAIT,
504 : POLLING_EVENTS_NUM, evp);
505 :
506 0 : if (rc < 0) {
507 0 : SPDK_DEBUGLOG(bdev_daos, "%s: could not poll daos event queue: " DF_RC "\n",
508 : ch->disk->disk.name, DP_RC(rc));
509 : /*
510 : * TODO: There are cases when this is self healing, e.g.
511 : * brief network issues, DAOS agent restarting etc.
512 : * However, if the issue persists over some time better would be
513 : * to remove a bdev or the whole controller
514 : */
515 0 : return SPDK_POLLER_BUSY;
516 : }
517 :
518 0 : for (int i = 0; i < rc; ++i) {
519 0 : int status = 0;
520 0 : struct bdev_daos_task *task = SPDK_CONTAINEROF(evp[i], struct bdev_daos_task, ev);
521 :
522 0 : assert(task != NULL);
523 :
524 0 : if (task->ev.ev_error != DER_SUCCESS) {
525 0 : status = -task->ev.ev_error;
526 : }
527 :
528 0 : daos_event_fini(&task->ev);
529 0 : bdev_daos_io_complete(task->bdev_io, status);
530 : }
531 :
532 0 : return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
533 : }
534 :
535 : static bool
536 0 : bdev_daos_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
537 : {
538 0 : switch (io_type) {
539 0 : case SPDK_BDEV_IO_TYPE_READ:
540 : case SPDK_BDEV_IO_TYPE_WRITE:
541 : case SPDK_BDEV_IO_TYPE_RESET:
542 : case SPDK_BDEV_IO_TYPE_FLUSH:
543 : case SPDK_BDEV_IO_TYPE_UNMAP:
544 0 : return true;
545 :
546 0 : default:
547 0 : return false;
548 : }
549 : }
550 :
551 : static struct spdk_io_channel *
552 0 : bdev_daos_get_io_channel(void *ctx)
553 : {
554 0 : return spdk_get_io_channel(ctx);
555 : }
556 :
557 : static void
558 0 : bdev_daos_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
559 : {
560 0 : struct bdev_daos *daos = bdev->ctxt;
561 :
562 0 : spdk_json_write_object_begin(w);
563 :
564 0 : spdk_json_write_named_string(w, "method", "bdev_daos_create");
565 :
566 0 : spdk_json_write_named_object_begin(w, "params");
567 0 : spdk_json_write_named_string(w, "name", bdev->name);
568 0 : spdk_json_write_named_string(w, "pool", daos->pool_name);
569 0 : spdk_json_write_named_string(w, "cont", daos->cont_name);
570 0 : spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt);
571 0 : spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
572 0 : spdk_json_write_named_uuid(w, "uuid", &bdev->uuid);
573 :
574 0 : spdk_json_write_object_end(w);
575 :
576 0 : spdk_json_write_object_end(w);
577 0 : }
578 :
579 : static const struct spdk_bdev_fn_table daos_fn_table = {
580 : .destruct = bdev_daos_destruct,
581 : .submit_request = bdev_daos_submit_request,
582 : .io_type_supported = bdev_daos_io_type_supported,
583 : .get_io_channel = bdev_daos_get_io_channel,
584 : .write_config_json = bdev_daos_write_json_config,
585 : };
586 :
587 : static int
588 0 : bdev_daos_io_channel_setup_daos(struct bdev_daos_io_channel *ch)
589 : {
590 0 : int rc = 0;
591 0 : struct bdev_daos *daos = ch->disk;
592 0 : daos_pool_info_t pinfo;
593 0 : daos_cont_info_t cinfo;
594 :
595 0 : int fd_oflag = O_CREAT | O_RDWR;
596 0 : mode_t mode = S_IFREG | S_IRWXU | S_IRWXG | S_IRWXO;
597 :
598 0 : rc = bdev_daos_get_engine();
599 0 : if (rc) {
600 0 : SPDK_ERRLOG("could not initialize DAOS engine: " DF_RC "\n", DP_RC(rc));
601 0 : return -daos2posix_errno(rc);
602 : }
603 :
604 0 : SPDK_DEBUGLOG(bdev_daos, "connecting to daos pool '%s'\n", daos->pool_name);
605 0 : if ((rc = daos_pool_connect(daos->pool_name, NULL, DAOS_PC_RW, &ch->pool, &pinfo, NULL))) {
606 0 : SPDK_ERRLOG("%s: could not connect to daos pool: " DF_RC "\n",
607 : daos->disk.name, DP_RC(rc));
608 0 : return -daos2posix_errno(rc);
609 : }
610 0 : SPDK_DEBUGLOG(bdev_daos, "connecting to daos container '%s'\n", daos->cont_name);
611 0 : if ((rc = daos_cont_open(ch->pool, daos->cont_name, DAOS_COO_RW, &ch->cont, &cinfo, NULL))) {
612 0 : SPDK_ERRLOG("%s: could not open daos container: " DF_RC "\n",
613 : daos->disk.name, DP_RC(rc));
614 0 : rc = daos2posix_errno(rc);
615 0 : goto cleanup_pool;
616 : }
617 0 : SPDK_DEBUGLOG(bdev_daos, "mounting daos dfs\n");
618 0 : if ((rc = dfs_mount(ch->pool, ch->cont, O_RDWR, &ch->dfs))) {
619 0 : SPDK_ERRLOG("%s: could not mount daos dfs: %s\n", daos->disk.name, strerror(rc));
620 0 : goto cleanup_cont;
621 : }
622 0 : SPDK_DEBUGLOG(bdev_daos, "opening dfs object\n");
623 0 : if ((rc = dfs_open(ch->dfs, NULL, daos->disk.name, mode, fd_oflag, daos->oclass,
624 : 0, NULL, &ch->obj))) {
625 0 : SPDK_ERRLOG("%s: could not open dfs object: %s\n", daos->disk.name, strerror(rc));
626 0 : goto cleanup_mount;
627 : }
628 0 : if ((rc = daos_eq_create(&ch->queue))) {
629 0 : SPDK_ERRLOG("%s: could not create daos event queue: " DF_RC "\n",
630 : daos->disk.name, DP_RC(rc));
631 0 : rc = daos2posix_errno(rc);
632 0 : goto cleanup_obj;
633 : }
634 :
635 0 : return 0;
636 :
637 0 : cleanup_obj:
638 0 : dfs_release(ch->obj);
639 0 : cleanup_mount:
640 0 : dfs_umount(ch->dfs);
641 0 : cleanup_cont:
642 0 : daos_cont_close(ch->cont, NULL);
643 0 : cleanup_pool:
644 0 : daos_pool_disconnect(ch->pool, NULL);
645 :
646 0 : return -rc;
647 : }
648 :
649 : static int
650 0 : bdev_daos_io_channel_create_cb(void *io_device, void *ctx_buf)
651 : {
652 : int rc;
653 0 : struct bdev_daos_io_channel *ch = ctx_buf;
654 :
655 0 : ch->disk = io_device;
656 :
657 0 : if ((rc = bdev_daos_io_channel_setup_daos(ch))) {
658 0 : return rc;
659 : }
660 :
661 0 : SPDK_DEBUGLOG(bdev_daos, "%s: starting daos event queue poller\n",
662 : ch->disk->disk.name);
663 :
664 0 : ch->poller = SPDK_POLLER_REGISTER(bdev_daos_channel_poll, ch, 0);
665 :
666 0 : return 0;
667 : }
668 :
669 : static void
670 0 : bdev_daos_io_channel_destroy_cb(void *io_device, void *ctx_buf)
671 : {
672 : int rc;
673 0 : struct bdev_daos_io_channel *ch = ctx_buf;
674 :
675 0 : SPDK_DEBUGLOG(bdev_daos, "stopping daos event queue poller\n");
676 :
677 0 : spdk_poller_unregister(&ch->poller);
678 :
679 0 : if ((rc = daos_eq_destroy(ch->queue, DAOS_EQ_DESTROY_FORCE))) {
680 0 : SPDK_ERRLOG("could not destroy daos event queue: " DF_RC "\n", DP_RC(rc));
681 : }
682 0 : if ((rc = dfs_release(ch->obj))) {
683 0 : SPDK_ERRLOG("could not release dfs object: %s\n", strerror(rc));
684 : }
685 0 : if ((rc = dfs_umount(ch->dfs))) {
686 0 : SPDK_ERRLOG("could not unmount dfs: %s\n", strerror(rc));
687 : }
688 0 : if ((rc = daos_cont_close(ch->cont, NULL))) {
689 0 : SPDK_ERRLOG("could not close container: " DF_RC "\n", DP_RC(rc));
690 : }
691 0 : if ((rc = daos_pool_disconnect(ch->pool, NULL))) {
692 0 : SPDK_ERRLOG("could not disconnect from pool: " DF_RC "\n", DP_RC(rc));
693 : }
694 0 : rc = bdev_daos_put_engine();
695 0 : if (rc) {
696 0 : SPDK_ERRLOG("could not de-initialize DAOS engine: " DF_RC "\n", DP_RC(rc));
697 : }
698 0 : }
699 :
700 : int
701 0 : create_bdev_daos(struct spdk_bdev **bdev,
702 : const char *name, const struct spdk_uuid *uuid,
703 : const char *pool, const char *cont, const char *oclass,
704 : uint64_t num_blocks, uint32_t block_size)
705 : {
706 : int rc;
707 : size_t len;
708 : struct bdev_daos *daos;
709 0 : struct bdev_daos_io_channel ch = {};
710 :
711 0 : SPDK_NOTICELOG("%s: creating bdev_daos disk on '%s:%s'\n", name, pool, cont);
712 :
713 0 : if (num_blocks == 0) {
714 0 : SPDK_ERRLOG("Disk num_blocks must be greater than 0");
715 0 : return -EINVAL;
716 : }
717 :
718 0 : if (block_size % 512) {
719 0 : SPDK_ERRLOG("block size must be 512 bytes aligned\n");
720 0 : return -EINVAL;
721 : }
722 :
723 0 : if (!name) {
724 0 : SPDK_ERRLOG("device name cannot be empty\n");
725 0 : return -EINVAL;
726 : }
727 :
728 0 : if (!pool) {
729 0 : SPDK_ERRLOG("daos pool cannot be empty\n");
730 0 : return -EINVAL;
731 : }
732 0 : if (!cont) {
733 0 : SPDK_ERRLOG("daos cont cannot be empty\n");
734 0 : return -EINVAL;
735 : }
736 :
737 0 : daos = calloc(1, sizeof(*daos));
738 0 : if (!daos) {
739 0 : SPDK_ERRLOG("calloc() failed\n");
740 0 : return -ENOMEM;
741 : }
742 :
743 0 : if (!oclass) {
744 0 : oclass = "SX"; /* Max throughput by default */
745 : }
746 0 : daos->oclass = daos_oclass_name2id(oclass);
747 0 : if (daos->oclass == OC_UNKNOWN) {
748 0 : SPDK_ERRLOG("could not parse daos oclass: '%s'\n", oclass);
749 0 : free(daos);
750 0 : return -EINVAL;
751 : }
752 :
753 0 : len = strlen(pool);
754 0 : if (len > DAOS_PROP_LABEL_MAX_LEN) {
755 0 : SPDK_ERRLOG("daos pool name is too long\n");
756 0 : free(daos);
757 0 : return -EINVAL;
758 : }
759 0 : memcpy(daos->pool_name, pool, len);
760 :
761 0 : len = strlen(cont);
762 0 : if (len > DAOS_PROP_LABEL_MAX_LEN) {
763 0 : SPDK_ERRLOG("daos cont name is too long\n");
764 0 : free(daos);
765 0 : return -EINVAL;
766 : }
767 0 : memcpy(daos->cont_name, cont, len);
768 :
769 0 : daos->disk.name = strdup(name);
770 0 : daos->disk.product_name = "DAOS bdev";
771 :
772 0 : daos->disk.write_cache = 0;
773 0 : daos->disk.blocklen = block_size;
774 0 : daos->disk.blockcnt = num_blocks;
775 0 : daos->disk.uuid = *uuid;
776 0 : daos->disk.max_num_segments = SPDK_BDEV_IO_NUM_CHILD_IOV;
777 :
778 0 : daos->disk.ctxt = daos;
779 0 : daos->disk.fn_table = &daos_fn_table;
780 0 : daos->disk.module = &daos_if;
781 :
782 0 : rc = bdev_daos_get_engine();
783 0 : if (rc) {
784 0 : SPDK_ERRLOG("could not initialize DAOS engine: " DF_RC "\n", DP_RC(rc));
785 0 : bdev_daos_free(daos);
786 0 : return -daos2posix_errno(rc);
787 : }
788 :
789 : /* We try to connect to the DAOS container during channel creation, so simulate
790 : * creating a channel here, so that we can return a failure when the DAOS bdev
791 : * is created, instead of finding it out later when the first channel is created
792 : * and leaving unusable bdev registered.
793 : */
794 0 : rc = bdev_daos_io_channel_create_cb(daos, &ch);
795 0 : if (rc) {
796 0 : SPDK_ERRLOG("'%s' could not initialize io-channel: %s\n", name, strerror(-rc));
797 0 : bdev_daos_free(daos);
798 0 : return rc;
799 : }
800 0 : bdev_daos_io_channel_destroy_cb(daos, &ch);
801 :
802 0 : spdk_io_device_register(daos, bdev_daos_io_channel_create_cb,
803 : bdev_daos_io_channel_destroy_cb,
804 : sizeof(struct bdev_daos_io_channel),
805 0 : daos->disk.name);
806 :
807 :
808 0 : rc = spdk_bdev_register(&daos->disk);
809 0 : if (rc) {
810 0 : spdk_io_device_unregister(daos, NULL);
811 0 : bdev_daos_free(daos);
812 0 : return rc;
813 : }
814 :
815 0 : *bdev = &(daos->disk);
816 :
817 0 : return rc;
818 : }
819 :
820 : static void
821 0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
822 : {
823 0 : }
824 :
825 : int
826 0 : bdev_daos_resize(const char *name, const uint64_t new_size_in_mb)
827 : {
828 0 : int rc = 0;
829 0 : struct spdk_bdev_desc *desc;
830 : struct spdk_bdev *bdev;
831 : struct spdk_io_channel *ch;
832 : struct bdev_daos_io_channel *dch;
833 : uint64_t new_size_in_byte;
834 : uint64_t current_size_in_mb;
835 :
836 0 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc);
837 0 : if (rc != 0) {
838 0 : return rc;
839 : }
840 :
841 0 : bdev = spdk_bdev_desc_get_bdev(desc);
842 0 : if (bdev->module != &daos_if) {
843 0 : rc = -EINVAL;
844 0 : goto exit;
845 : }
846 :
847 0 : current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024);
848 0 : if (current_size_in_mb > new_size_in_mb) {
849 0 : SPDK_ERRLOG("The new bdev size must be larger than current bdev size.\n");
850 0 : rc = -EINVAL;
851 0 : goto exit;
852 : }
853 :
854 0 : ch = bdev_daos_get_io_channel(bdev);
855 0 : dch = spdk_io_channel_get_ctx(ch);
856 0 : new_size_in_byte = new_size_in_mb * 1024 * 1024;
857 :
858 0 : rc = dfs_punch(dch->dfs, dch->obj, new_size_in_byte, DFS_MAX_FSIZE);
859 0 : spdk_put_io_channel(ch);
860 0 : if (rc != 0) {
861 0 : SPDK_ERRLOG("failed to resize daos bdev: %s", strerror(rc));
862 0 : rc = -rc;
863 0 : goto exit;
864 : }
865 :
866 0 : SPDK_NOTICELOG("DAOS bdev device is resized: bdev name %s, old block count %" PRIu64
867 : ", new block count %"
868 : PRIu64 "\n",
869 : bdev->name,
870 : bdev->blockcnt,
871 : new_size_in_byte / bdev->blocklen);
872 0 : rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen);
873 0 : if (rc != 0) {
874 0 : SPDK_ERRLOG("failed to notify block cnt change.\n");
875 : }
876 :
877 0 : exit:
878 0 : spdk_bdev_close(desc);
879 0 : return rc;
880 : }
881 :
882 : void
883 0 : delete_bdev_daos(const char *bdev_name, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
884 : {
885 : int rc;
886 :
887 0 : rc = spdk_bdev_unregister_by_name(bdev_name, &daos_if, cb_fn, cb_arg);
888 0 : if (rc != 0) {
889 0 : cb_fn(cb_arg, rc);
890 : }
891 0 : }
892 :
893 : static int
894 0 : bdev_daos_get_engine(void)
895 : {
896 0 : int rc = 0;
897 :
898 0 : pthread_mutex_lock(&g_bdev_daos_init_mutex);
899 0 : if (g_bdev_daos_init_count++ > 0) {
900 0 : pthread_mutex_unlock(&g_bdev_daos_init_mutex);
901 0 : return 0;
902 : }
903 0 : SPDK_DEBUGLOG(bdev_daos, "initializing DAOS engine\n");
904 :
905 0 : rc = daos_init();
906 0 : pthread_mutex_unlock(&g_bdev_daos_init_mutex);
907 :
908 0 : if (rc != -DER_ALREADY && rc) {
909 0 : return rc;
910 : }
911 0 : return 0;
912 : }
913 :
914 : static int
915 0 : bdev_daos_put_engine(void)
916 : {
917 0 : int rc = 0;
918 :
919 0 : pthread_mutex_lock(&g_bdev_daos_init_mutex);
920 0 : if (--g_bdev_daos_init_count > 0) {
921 0 : pthread_mutex_unlock(&g_bdev_daos_init_mutex);
922 0 : return 0;
923 : }
924 0 : SPDK_DEBUGLOG(bdev_daos, "de-initializing DAOS engine\n");
925 :
926 0 : rc = daos_fini();
927 0 : pthread_mutex_unlock(&g_bdev_daos_init_mutex);
928 :
929 0 : return rc;
930 : }
931 :
932 : static int
933 0 : bdev_daos_initialize(void)
934 : {
935 : /* DAOS engine and client initialization happens
936 : during the first bdev creation */
937 0 : return 0;
938 : }
939 :
940 0 : SPDK_LOG_REGISTER_COMPONENT(bdev_daos)
|