Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 : */
4 : #include "spdk/stdinc.h"
5 : #include "spdk/event.h"
6 : #include "spdk/log.h"
7 : #include "spdk/string.h"
8 : #include "spdk/config.h"
9 : #include "spdk/util.h"
10 : #include "spdk/thread.h"
11 : #include "aio_mgr.h"
12 : #include "fsdev_aio.h"
13 :
14 : #define IO_STATUS_ASYNC INT_MIN
15 :
16 : #ifndef UNUSED
17 : #define UNUSED(x) (void)(x)
18 : #endif
19 :
20 : /* See https://libfuse.github.io/doxygen/structfuse__conn__info.html */
21 : #define MAX_BACKGROUND (100)
22 : #define TIME_GRAN (1)
23 : #define MAX_AIOS 256
24 : #define DEFAULT_WRITEBACK_CACHE true
25 : #define DEFAULT_MAX_WRITE 0x00020000
26 : #define DEFAULT_XATTR_ENABLED false
27 : #define DEFAULT_SKIP_RW false
28 : #define DEFAULT_TIMEOUT_MS 0 /* to prevent the attribute caching */
29 :
30 : #ifdef SPDK_CONFIG_HAVE_STRUCT_STAT_ST_ATIM
31 : /* Linux */
32 : #define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec)
33 : #define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec)
34 : #define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec)
35 : #define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atim.tv_nsec = (val)
36 : #define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctim.tv_nsec = (val)
37 : #define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtim.tv_nsec = (val)
38 : #elif defined(SPDK_CONFIG_HAVE_STRUCT_STAT_ST_ATIMESPEC)
39 : /* FreeBSD */
40 : #define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec)
41 : #define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec)
42 : #define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec)
43 : #define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atimespec.tv_nsec = (val)
44 : #define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctimespec.tv_nsec = (val)
45 : #define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtimespec.tv_nsec = (val)
46 : #else
47 : #define ST_ATIM_NSEC(stbuf) 0
48 : #define ST_CTIM_NSEC(stbuf) 0
49 : #define ST_MTIM_NSEC(stbuf) 0
50 : #define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0)
51 : #define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0)
52 : #define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0)
53 : #endif
54 :
55 : struct lo_cred {
56 : uid_t euid;
57 : gid_t egid;
58 : };
59 :
60 : /** Inode number type */
61 : typedef uint64_t spdk_ino_t;
62 :
63 : struct lo_key {
64 : ino_t ino;
65 : dev_t dev;
66 : };
67 :
68 : struct spdk_fsdev_file_handle {
69 : int fd;
70 : struct {
71 : DIR *dp;
72 : struct dirent *entry;
73 : off_t offset;
74 : } dir;
75 : struct spdk_fsdev_file_object *fobject;
76 : TAILQ_ENTRY(spdk_fsdev_file_handle) link;
77 : };
78 :
79 : #define FOBJECT_FMT "ino=%" PRIu64 " dev=%" PRIu64
80 : #define FOBJECT_ARGS(fo) ((uint64_t)(fo)->key.ino), ((uint64_t)(fo)->key.dev)
81 : struct spdk_fsdev_file_object {
82 : uint32_t is_symlink : 1;
83 : uint32_t is_dir : 1;
84 : uint32_t reserved : 30;
85 : int fd;
86 : char *fd_str;
87 : struct lo_key key;
88 : uint64_t refcount;
89 : struct spdk_fsdev_file_object *parent_fobject;
90 : TAILQ_ENTRY(spdk_fsdev_file_object) link;
91 : TAILQ_HEAD(, spdk_fsdev_file_object) leafs;
92 : TAILQ_HEAD(, spdk_fsdev_file_handle) handles;
93 : struct spdk_spinlock lock;
94 : char name[];
95 : };
96 :
97 : struct aio_fsdev {
98 : struct spdk_fsdev fsdev;
99 : struct spdk_fsdev_mount_opts mount_opts;
100 : char *root_path;
101 : int proc_self_fd;
102 : pthread_mutex_t mutex;
103 : struct spdk_fsdev_file_object *root;
104 : TAILQ_ENTRY(aio_fsdev) tailq;
105 : bool xattr_enabled;
106 : bool skip_rw;
107 : };
108 :
109 : struct aio_fsdev_io {
110 : struct spdk_aio_mgr_io *aio;
111 : struct aio_io_channel *ch;
112 : TAILQ_ENTRY(aio_fsdev_io) link;
113 : };
114 :
115 : struct aio_io_channel {
116 : struct spdk_poller *poller;
117 : struct spdk_aio_mgr *mgr;
118 : TAILQ_HEAD(, aio_fsdev_io) ios_in_progress;
119 : TAILQ_HEAD(, aio_fsdev_io) ios_to_complete;
120 : };
121 :
122 : static TAILQ_HEAD(, aio_fsdev) g_aio_fsdev_head = TAILQ_HEAD_INITIALIZER(
123 : g_aio_fsdev_head);
124 :
125 : static inline struct aio_fsdev *
126 0 : fsdev_to_aio_fsdev(struct spdk_fsdev *fsdev)
127 : {
128 0 : return SPDK_CONTAINEROF(fsdev, struct aio_fsdev, fsdev);
129 : }
130 :
131 : static inline struct spdk_fsdev_io *
132 0 : aio_to_fsdev_io(const struct aio_fsdev_io *aio_io)
133 : {
134 0 : return SPDK_CONTAINEROF(aio_io, struct spdk_fsdev_io, driver_ctx);
135 : }
136 :
137 : static inline struct aio_fsdev_io *
138 0 : fsdev_to_aio_io(const struct spdk_fsdev_io *fsdev_io)
139 : {
140 0 : return (struct aio_fsdev_io *)fsdev_io->driver_ctx;
141 : }
142 :
143 : static inline bool
144 0 : fsdev_aio_is_valid_fobject(struct aio_fsdev *vfsdev, struct spdk_fsdev_file_object *fobject)
145 : {
146 0 : return fobject != NULL;
147 : }
148 :
149 : static inline bool
150 0 : fsdev_aio_is_valid_fhandle(struct aio_fsdev *vfsdev, struct spdk_fsdev_file_handle *fhandle)
151 : {
152 0 : return fhandle != NULL;
153 : }
154 :
155 : static int
156 0 : is_dot_or_dotdot(const char *name)
157 : {
158 0 : return name[0] == '.' && (name[1] == '\0' ||
159 0 : (name[1] == '.' && name[2] == '\0'));
160 : }
161 :
162 : /* Is `path` a single path component that is not "." or ".."? */
163 : static int
164 0 : is_safe_path_component(const char *path)
165 : {
166 0 : if (strchr(path, '/')) {
167 0 : return 0;
168 : }
169 :
170 0 : return !is_dot_or_dotdot(path);
171 : }
172 :
173 : static struct spdk_fsdev_file_object *
174 0 : lo_find_leaf_unsafe(struct spdk_fsdev_file_object *fobject, ino_t ino, dev_t dev)
175 : {
176 : struct spdk_fsdev_file_object *leaf_fobject;
177 :
178 0 : TAILQ_FOREACH(leaf_fobject, &fobject->leafs, link) {
179 0 : if (leaf_fobject->key.ino == ino && leaf_fobject->key.dev == dev) {
180 0 : return leaf_fobject;
181 : }
182 : }
183 :
184 0 : return NULL;
185 : }
186 :
187 : /* This function returns:
188 : * 1 if the refcount is still non zero
189 : * a negative error number if the refcount became zero, the file object was deleted but the defered underlying file deletion failed
190 : * 0 if the refcount became zero, the file object was deleted and eithr the underlying file deletion wasn't defered or succeeded
191 : */
192 : static int
193 0 : file_object_unref(struct spdk_fsdev_file_object *fobject, uint32_t count)
194 : {
195 0 : int res = 0;
196 :
197 0 : spdk_spin_lock(&fobject->lock);
198 0 : assert(fobject->refcount >= count);
199 0 : fobject->refcount -= count;
200 0 : spdk_spin_unlock(&fobject->lock);
201 :
202 0 : if (!fobject->refcount) {
203 0 : struct spdk_fsdev_file_object *parent_fobject = fobject->parent_fobject;
204 :
205 0 : if (parent_fobject) {
206 0 : spdk_spin_lock(&parent_fobject->lock);
207 0 : TAILQ_REMOVE(&parent_fobject->leafs, fobject, link);
208 0 : spdk_spin_unlock(&parent_fobject->lock);
209 0 : file_object_unref(parent_fobject, 1); /* unref by the leaf */
210 : }
211 :
212 0 : spdk_spin_destroy(&fobject->lock);
213 0 : close(fobject->fd);
214 0 : free(fobject->fd_str);
215 0 : free(fobject);
216 : }
217 :
218 0 : return res;
219 : }
220 :
221 : static void
222 0 : file_object_ref(struct spdk_fsdev_file_object *fobject)
223 : {
224 0 : spdk_spin_lock(&fobject->lock);
225 0 : fobject->refcount++;
226 0 : spdk_spin_unlock(&fobject->lock);
227 0 : }
228 :
229 : static struct spdk_fsdev_file_object *
230 0 : file_object_create_unsafe(struct spdk_fsdev_file_object *parent_fobject, int fd, ino_t ino,
231 : dev_t dev, mode_t mode)
232 : {
233 : struct spdk_fsdev_file_object *fobject;
234 :
235 0 : fobject = calloc(1, sizeof(*fobject));
236 0 : if (!fobject) {
237 0 : SPDK_ERRLOG("Cannot alloc fobject\n");
238 0 : return NULL;
239 : }
240 :
241 0 : fobject->fd_str = spdk_sprintf_alloc("%d", fd);
242 0 : if (!fobject->fd_str) {
243 0 : SPDK_ERRLOG("Cannot alloc fd_str\n");
244 0 : free(fobject);
245 0 : return NULL;
246 : }
247 :
248 0 : fobject->fd = fd;
249 0 : fobject->key.ino = ino;
250 0 : fobject->key.dev = dev;
251 0 : fobject->refcount = 1;
252 0 : fobject->is_symlink = S_ISLNK(mode) ? 1 : 0;
253 0 : fobject->is_dir = S_ISDIR(mode) ? 1 : 0;
254 :
255 0 : TAILQ_INIT(&fobject->handles);
256 0 : TAILQ_INIT(&fobject->leafs);
257 0 : spdk_spin_init(&fobject->lock);
258 :
259 0 : if (parent_fobject) {
260 0 : fobject->parent_fobject = parent_fobject;
261 0 : TAILQ_INSERT_TAIL(&parent_fobject->leafs, fobject, link);
262 0 : parent_fobject->refcount++;
263 : }
264 :
265 0 : return fobject;
266 : }
267 :
268 : static struct spdk_fsdev_file_handle *
269 0 : file_handle_create(struct spdk_fsdev_file_object *fobject, int fd)
270 : {
271 : struct spdk_fsdev_file_handle *fhandle;
272 :
273 0 : fhandle = calloc(1, sizeof(*fhandle));
274 0 : if (!fhandle) {
275 0 : SPDK_ERRLOG("Cannot alloc fhandle\n");
276 0 : return NULL;
277 : }
278 :
279 0 : fhandle->fobject = fobject;
280 0 : fhandle->fd = fd;
281 :
282 0 : spdk_spin_lock(&fobject->lock);
283 0 : fobject->refcount++;
284 0 : TAILQ_INSERT_TAIL(&fobject->handles, fhandle, link);
285 0 : spdk_spin_unlock(&fobject->lock);
286 :
287 0 : return fhandle;
288 : }
289 :
290 : static void
291 0 : file_handle_delete(struct spdk_fsdev_file_handle *fhandle)
292 : {
293 0 : struct spdk_fsdev_file_object *fobject = fhandle->fobject;
294 :
295 0 : spdk_spin_lock(&fobject->lock);
296 0 : fobject->refcount--;
297 0 : TAILQ_REMOVE(&fobject->handles, fhandle, link);
298 0 : spdk_spin_unlock(&fobject->lock);
299 :
300 0 : if (fhandle->dir.dp) {
301 0 : closedir(fhandle->dir.dp);
302 : }
303 :
304 0 : close(fhandle->fd);
305 0 : free(fhandle);
306 0 : }
307 :
308 : static int
309 0 : file_object_fill_attr(struct spdk_fsdev_file_object *fobject, struct spdk_fsdev_file_attr *attr)
310 : {
311 0 : struct stat stbuf;
312 : int res;
313 :
314 0 : res = fstatat(fobject->fd, "", &stbuf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
315 0 : if (res == -1) {
316 0 : res = -errno;
317 0 : SPDK_ERRLOG("fstatat() failed with %d\n", res);
318 0 : return res;
319 : }
320 :
321 0 : memset(attr, 0, sizeof(*attr));
322 :
323 0 : attr->ino = stbuf.st_ino;
324 0 : attr->size = stbuf.st_size;
325 0 : attr->blocks = stbuf.st_blocks;
326 0 : attr->atime = stbuf.st_atime;
327 0 : attr->mtime = stbuf.st_mtime;
328 0 : attr->ctime = stbuf.st_ctime;
329 0 : attr->atimensec = ST_ATIM_NSEC(&stbuf);
330 0 : attr->mtimensec = ST_MTIM_NSEC(&stbuf);
331 0 : attr->ctimensec = ST_CTIM_NSEC(&stbuf);
332 0 : attr->mode = stbuf.st_mode;
333 0 : attr->nlink = stbuf.st_nlink;
334 0 : attr->uid = stbuf.st_uid;
335 0 : attr->gid = stbuf.st_gid;
336 0 : attr->rdev = stbuf.st_rdev;
337 0 : attr->blksize = stbuf.st_blksize;
338 0 : attr->valid_ms = DEFAULT_TIMEOUT_MS;
339 :
340 0 : return 0;
341 : }
342 :
343 : static int
344 0 : utimensat_empty(struct aio_fsdev *vfsdev, struct spdk_fsdev_file_object *fobject,
345 : const struct timespec *tv)
346 : {
347 : int res;
348 :
349 0 : if (fobject->is_symlink) {
350 0 : res = utimensat(fobject->fd, "", tv, AT_EMPTY_PATH);
351 0 : if (res == -1 && errno == EINVAL) {
352 : /* Sorry, no race free way to set times on symlink. */
353 0 : errno = EPERM;
354 : }
355 : } else {
356 0 : res = utimensat(vfsdev->proc_self_fd, fobject->fd_str, tv, 0);
357 : }
358 :
359 0 : return res;
360 : }
361 :
362 : static void
363 0 : fsdev_free_leafs(struct spdk_fsdev_file_object *fobject, bool unref_fobject)
364 : {
365 0 : while (!TAILQ_EMPTY(&fobject->handles)) {
366 0 : struct spdk_fsdev_file_handle *fhandle = TAILQ_FIRST(&fobject->handles);
367 0 : file_handle_delete(fhandle);
368 : #ifdef __clang_analyzer__
369 : /*
370 : * scan-build fails to comprehend that file_handle_delete() removes the fhandle
371 : * from the queue, so it thinks it's remained accessible and throws the "Use of
372 : * memory after it is freed" error here.
373 : * The loop below "teaches" the scan-build that the freed fhandle is not on the
374 : * list anymore and supresses the error in this way.
375 : */
376 : struct spdk_fsdev_file_handle *tmp;
377 : TAILQ_FOREACH(tmp, &fobject->handles, link) {
378 : assert(tmp != fhandle);
379 : }
380 : #endif
381 : }
382 :
383 0 : while (!TAILQ_EMPTY(&fobject->leafs)) {
384 0 : struct spdk_fsdev_file_object *leaf_fobject = TAILQ_FIRST(&fobject->leafs);
385 0 : fsdev_free_leafs(leaf_fobject, true);
386 : }
387 :
388 0 : if (fobject->refcount && unref_fobject) {
389 : /* if still referenced - zero refcount */
390 0 : int res = file_object_unref(fobject, fobject->refcount);
391 0 : assert(res == 0);
392 : UNUSED(res);
393 : }
394 0 : }
395 :
396 : static int
397 0 : lo_getattr(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
398 : {
399 : int res;
400 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
401 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.getattr.fobject;
402 :
403 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
404 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
405 0 : return -EINVAL;
406 : }
407 :
408 0 : res = file_object_fill_attr(fobject, &fsdev_io->u_out.getattr.attr);
409 0 : if (res) {
410 0 : SPDK_ERRLOG("Cannot fill attr for " FOBJECT_FMT " (err=%d)\n", FOBJECT_ARGS(fobject), res);
411 0 : return res;
412 : }
413 :
414 0 : SPDK_DEBUGLOG(fsdev_aio, "GETATTR succeeded for " FOBJECT_FMT "\n", FOBJECT_ARGS(fobject));
415 0 : return 0;
416 : }
417 :
418 : static int
419 0 : lo_opendir(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
420 : {
421 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
422 : int error;
423 : int fd;
424 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.opendir.fobject;
425 0 : uint32_t flags = fsdev_io->u_in.opendir.flags;
426 0 : struct spdk_fsdev_file_handle *fhandle = NULL;
427 :
428 : UNUSED(flags);
429 :
430 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
431 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
432 0 : return -EINVAL;
433 : }
434 :
435 0 : fd = openat(fobject->fd, ".", O_RDONLY);
436 0 : if (fd == -1) {
437 0 : error = -errno;
438 0 : SPDK_ERRLOG("openat failed for " FOBJECT_FMT " (err=%d)\n", FOBJECT_ARGS(fobject), error);
439 0 : goto out_err;
440 : }
441 :
442 0 : fhandle = file_handle_create(fobject, fd);
443 0 : if (fhandle == NULL) {
444 0 : error = -ENOMEM;
445 0 : SPDK_ERRLOG("file_handle_create failed for " FOBJECT_FMT " (err=%d)\n", FOBJECT_ARGS(fobject),
446 : error);
447 0 : goto out_err;
448 : }
449 :
450 0 : fhandle->dir.dp = fdopendir(fd);
451 0 : if (fhandle->dir.dp == NULL) {
452 0 : error = -errno;
453 0 : SPDK_ERRLOG("fdopendir failed for " FOBJECT_FMT " (err=%d)\n", FOBJECT_ARGS(fobject), error);
454 0 : goto out_err;
455 : }
456 :
457 0 : fhandle->dir.offset = 0;
458 0 : fhandle->dir.entry = NULL;
459 :
460 0 : SPDK_DEBUGLOG(fsdev_aio, "OPENDIR succeeded for " FOBJECT_FMT " (fh=%p)\n",
461 : FOBJECT_ARGS(fobject), fhandle);
462 :
463 0 : fsdev_io->u_out.opendir.fhandle = fhandle;
464 :
465 0 : return 0;
466 :
467 0 : out_err:
468 0 : if (fhandle) {
469 0 : file_handle_delete(fhandle);
470 0 : } else if (fd != -1) {
471 0 : close(fd);
472 : }
473 :
474 0 : return error;
475 : }
476 :
477 : static int
478 0 : lo_releasedir(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
479 : {
480 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
481 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.releasedir.fobject;
482 0 : struct spdk_fsdev_file_handle *fhandle = fsdev_io->u_in.releasedir.fhandle;
483 :
484 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
485 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
486 0 : return -EINVAL;
487 : }
488 :
489 0 : if (!fsdev_aio_is_valid_fhandle(vfsdev, fhandle)) {
490 0 : SPDK_ERRLOG("Invalid fhandle: %p\n", fhandle);
491 0 : return -EINVAL;
492 : }
493 :
494 0 : SPDK_DEBUGLOG(fsdev_aio, "RELEASEDIR succeeded for " FOBJECT_FMT " (fh=%p)\n",
495 : FOBJECT_ARGS(fobject), fhandle);
496 :
497 0 : file_handle_delete(fhandle);
498 :
499 0 : return 0;
500 : }
501 :
502 : static int
503 0 : lo_set_mount_opts(struct aio_fsdev *vfsdev, struct spdk_fsdev_mount_opts *opts)
504 : {
505 0 : assert(opts != NULL);
506 0 : assert(opts->opts_size != 0);
507 :
508 : UNUSED(vfsdev);
509 :
510 0 : if (opts->opts_size > offsetof(struct spdk_fsdev_mount_opts, max_write)) {
511 : /* Set the value the aio fsdev was created with */
512 0 : opts->max_write = vfsdev->mount_opts.max_write;
513 : }
514 :
515 0 : if (opts->opts_size > offsetof(struct spdk_fsdev_mount_opts, writeback_cache_enabled)) {
516 0 : if (vfsdev->mount_opts.writeback_cache_enabled) {
517 : /* The writeback_cache_enabled was enabled upon creation => we follow the opts */
518 0 : vfsdev->mount_opts.writeback_cache_enabled = opts->writeback_cache_enabled;
519 : } else {
520 : /* The writeback_cache_enabled was disabled upon creation => we reflect it in the opts */
521 0 : opts->writeback_cache_enabled = false;
522 : }
523 : }
524 :
525 : /* The AIO doesn't apply any additional restrictions, so we just accept the requested opts */
526 0 : SPDK_DEBUGLOG(fsdev_aio,
527 : "aio filesystem %s: opts updated: max_write=%" PRIu32 ", writeback_cache=%" PRIu8 "\n",
528 : vfsdev->fsdev.name, vfsdev->mount_opts.max_write, vfsdev->mount_opts.writeback_cache_enabled);
529 :
530 0 : return 0;
531 : }
532 :
533 : static int
534 0 : lo_mount(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
535 : {
536 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
537 0 : struct spdk_fsdev_mount_opts *in_opts = &fsdev_io->u_in.mount.opts;
538 :
539 0 : fsdev_io->u_out.mount.opts = *in_opts;
540 0 : lo_set_mount_opts(vfsdev, &fsdev_io->u_out.mount.opts);
541 0 : file_object_ref(vfsdev->root);
542 0 : fsdev_io->u_out.mount.root_fobject = vfsdev->root;
543 :
544 0 : return 0;
545 : }
546 :
547 : static int
548 0 : lo_umount(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
549 : {
550 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
551 :
552 0 : fsdev_free_leafs(vfsdev->root, false);
553 0 : file_object_unref(vfsdev->root, 1); /* reference by mount */
554 :
555 0 : return 0;
556 : }
557 :
558 : static int
559 0 : lo_do_lookup(struct aio_fsdev *vfsdev, struct spdk_fsdev_file_object *parent_fobject,
560 : const char *name, struct spdk_fsdev_file_object **pfobject,
561 : struct spdk_fsdev_file_attr *attr)
562 : {
563 : int newfd;
564 : int res;
565 0 : struct stat stat;
566 : struct spdk_fsdev_file_object *fobject;
567 :
568 : /* Do not allow escaping root directory */
569 0 : if (parent_fobject == vfsdev->root && strcmp(name, "..") == 0) {
570 0 : name = ".";
571 : }
572 :
573 0 : newfd = openat(parent_fobject->fd, name, O_PATH | O_NOFOLLOW);
574 0 : if (newfd == -1) {
575 0 : res = -errno;
576 0 : SPDK_DEBUGLOG(fsdev_aio, "openat( " FOBJECT_FMT " %s) failed with %d\n",
577 : FOBJECT_ARGS(parent_fobject), name, res);
578 0 : return res;
579 : }
580 :
581 0 : res = fstatat(newfd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
582 0 : if (res == -1) {
583 0 : res = -errno;
584 0 : SPDK_ERRLOG("fstatat(%s) failed with %d\n", name, res);
585 0 : close(newfd);
586 0 : return res;
587 : }
588 :
589 0 : spdk_spin_lock(&parent_fobject->lock);
590 0 : fobject = lo_find_leaf_unsafe(parent_fobject, stat.st_ino, stat.st_dev);
591 0 : if (fobject) {
592 0 : close(newfd);
593 0 : newfd = -1;
594 0 : file_object_ref(fobject); /* reference by a lo_do_lookup caller */
595 : } else {
596 0 : fobject = file_object_create_unsafe(parent_fobject, newfd, stat.st_ino, stat.st_dev, stat.st_mode);
597 : }
598 0 : spdk_spin_unlock(&parent_fobject->lock);
599 :
600 0 : if (!fobject) {
601 0 : SPDK_ERRLOG("Cannot create file object\n");
602 0 : close(newfd);
603 0 : return -ENOMEM;
604 : }
605 :
606 0 : if (attr) {
607 0 : res = file_object_fill_attr(fobject, attr);
608 0 : if (res) {
609 0 : SPDK_ERRLOG("fill_attr(%s) failed with %d\n", name, res);
610 0 : file_object_unref(fobject, 1);
611 0 : if (newfd != -1) {
612 0 : close(newfd);
613 : }
614 0 : return res;
615 : }
616 : }
617 :
618 0 : *pfobject = fobject;
619 :
620 0 : SPDK_DEBUGLOG(fsdev_aio, "lookup(%s) in dir " FOBJECT_FMT ": " FOBJECT_FMT " fd=%d\n",
621 : name, FOBJECT_ARGS(parent_fobject), FOBJECT_ARGS(fobject), fobject->fd);
622 0 : return 0;
623 : }
624 :
625 : static int
626 0 : lo_lookup(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
627 : {
628 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
629 : int err;
630 0 : struct spdk_fsdev_file_object *parent_fobject = fsdev_io->u_in.lookup.parent_fobject;
631 0 : char *name = fsdev_io->u_in.lookup.name;
632 :
633 0 : if (!parent_fobject) {
634 0 : err = file_object_fill_attr(vfsdev->root, &fsdev_io->u_out.lookup.attr);
635 0 : if (err) {
636 0 : SPDK_DEBUGLOG(fsdev_aio, "file_object_fill_attr(root) failed with err=%d\n", err);
637 0 : return err;
638 : }
639 :
640 0 : file_object_ref(vfsdev->root);
641 0 : fsdev_io->u_out.lookup.fobject = vfsdev->root;
642 0 : return 0;
643 : }
644 :
645 0 : SPDK_DEBUGLOG(fsdev_aio, " name %s\n", name);
646 :
647 : /* Don't use is_safe_path_component(), allow "." and ".." for NFS export
648 : * support.
649 : */
650 0 : if (strchr(name, '/')) {
651 0 : return -EINVAL;
652 : }
653 :
654 0 : err = lo_do_lookup(vfsdev, parent_fobject, name, &fsdev_io->u_out.lookup.fobject,
655 : &fsdev_io->u_out.lookup.attr);
656 0 : if (err) {
657 0 : SPDK_DEBUGLOG(fsdev_aio, "lo_do_lookup(%s) failed with err=%d\n", name, err);
658 0 : return err;
659 : }
660 :
661 0 : return 0;
662 : }
663 :
664 : /*
665 : * Change to uid/gid of caller so that file is created with ownership of caller.
666 : */
667 : static int
668 0 : lo_change_cred(const struct lo_cred *new, struct lo_cred *old)
669 : {
670 : int res;
671 :
672 0 : old->euid = geteuid();
673 0 : old->egid = getegid();
674 :
675 0 : res = syscall(SYS_setresgid, -1, new->egid, -1);
676 0 : if (res == -1) {
677 0 : return -errno;
678 : }
679 :
680 0 : res = syscall(SYS_setresuid, -1, new->euid, -1);
681 0 : if (res == -1) {
682 0 : int errno_save = -errno;
683 :
684 0 : syscall(SYS_setresgid, -1, old->egid, -1);
685 0 : return errno_save;
686 : }
687 :
688 0 : return 0;
689 : }
690 :
691 : /* Regain Privileges */
692 : static void
693 0 : lo_restore_cred(struct lo_cred *old)
694 : {
695 : int res;
696 :
697 0 : res = syscall(SYS_setresuid, -1, old->euid, -1);
698 0 : if (res == -1) {
699 0 : SPDK_ERRLOG("seteuid(%u)", old->euid);
700 : }
701 :
702 0 : res = syscall(SYS_setresgid, -1, old->egid, -1);
703 0 : if (res == -1) {
704 0 : SPDK_ERRLOG("setegid(%u)", old->egid);
705 : }
706 0 : }
707 :
708 : static int
709 0 : lo_readdir(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
710 : {
711 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
712 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.readdir.fobject;
713 0 : struct spdk_fsdev_file_handle *fhandle = fsdev_io->u_in.readdir.fhandle;
714 0 : uint64_t offset = fsdev_io->u_in.readdir.offset;
715 :
716 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
717 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
718 0 : return -EINVAL;
719 : }
720 :
721 0 : if (!fsdev_aio_is_valid_fhandle(vfsdev, fhandle)) {
722 0 : SPDK_ERRLOG("Invalid fhandle: %p\n", fhandle);
723 0 : return -EINVAL;
724 : }
725 :
726 0 : if (((off_t)offset) != fhandle->dir.offset) {
727 0 : seekdir(fhandle->dir.dp, offset);
728 0 : fhandle->dir.entry = NULL;
729 0 : fhandle->dir.offset = offset;
730 : }
731 :
732 0 : while (1) {
733 : off_t nextoff;
734 : const char *name;
735 : int res;
736 :
737 0 : if (!fhandle->dir.entry) {
738 0 : errno = 0;
739 0 : fhandle->dir.entry = readdir(fhandle->dir.dp);
740 0 : if (!fhandle->dir.entry) {
741 0 : if (errno) { /* Error */
742 0 : res = -errno;
743 0 : SPDK_ERRLOG("readdir failed with err=%d", res);
744 0 : return res;
745 : } else { /* End of stream */
746 0 : break;
747 : }
748 : }
749 : }
750 :
751 0 : nextoff = fhandle->dir.entry->d_off;
752 0 : name = fhandle->dir.entry->d_name;
753 :
754 : /* Hide root's parent directory */
755 0 : if (fobject == vfsdev->root && strcmp(name, "..") == 0) {
756 0 : goto skip_entry;
757 : }
758 :
759 0 : if (is_dot_or_dotdot(name)) {
760 0 : fsdev_io->u_out.readdir.fobject = NULL;
761 0 : memset(&fsdev_io->u_out.readdir.attr, 0, sizeof(fsdev_io->u_out.readdir.attr));
762 0 : fsdev_io->u_out.readdir.attr.ino = fhandle->dir.entry->d_ino;
763 0 : fsdev_io->u_out.readdir.attr.mode = DT_DIR << 12;
764 0 : goto skip_lookup;
765 : }
766 :
767 0 : res = lo_do_lookup(vfsdev, fobject, name, &fsdev_io->u_out.readdir.fobject,
768 : &fsdev_io->u_out.readdir.attr);
769 0 : if (res) {
770 0 : SPDK_DEBUGLOG(fsdev_aio, "lo_do_lookup(%s) failed with err=%d\n", name, res);
771 0 : return res;
772 : }
773 :
774 0 : skip_lookup:
775 0 : fsdev_io->u_out.readdir.name = name;
776 0 : fsdev_io->u_out.readdir.offset = nextoff;
777 :
778 0 : res = fsdev_io->u_in.readdir.entry_cb_fn(fsdev_io, fsdev_io->internal.cb_arg);
779 0 : if (res) {
780 0 : if (fsdev_io->u_out.readdir.fobject) {
781 0 : file_object_unref(fsdev_io->u_out.readdir.fobject, 1);
782 : }
783 0 : break;
784 : }
785 :
786 0 : skip_entry:
787 0 : fhandle->dir.entry = NULL;
788 0 : fhandle->dir.offset = nextoff;
789 : }
790 :
791 0 : SPDK_DEBUGLOG(fsdev_aio, "READDIR succeeded for " FOBJECT_FMT " (fh=%p, offset=%" PRIu64 ")\n",
792 : FOBJECT_ARGS(fobject), fhandle, offset);
793 0 : return 0;
794 : }
795 :
796 : static int
797 0 : lo_forget(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
798 : {
799 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
800 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.readdir.fobject;
801 0 : uint64_t nlookup = fsdev_io->u_in.forget.nlookup;
802 :
803 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
804 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
805 0 : return -EINVAL;
806 : }
807 :
808 0 : file_object_unref(fobject, nlookup);
809 :
810 0 : return 0;
811 : }
812 :
813 : static uint32_t
814 0 : update_open_flags(struct aio_fsdev *vfsdev, uint32_t flags)
815 : {
816 : /*
817 : * With writeback cache, kernel may send read requests even
818 : * when userspace opened write-only
819 : */
820 0 : if (vfsdev->mount_opts.writeback_cache_enabled && (flags & O_ACCMODE) == O_WRONLY) {
821 0 : flags &= ~O_ACCMODE;
822 0 : flags |= O_RDWR;
823 : }
824 :
825 : /*
826 : * With writeback cache, O_APPEND is handled by the kernel.
827 : * This breaks atomicity (since the file may change in the
828 : * underlying filesystem, so that the kernel's idea of the
829 : * end of the file isn't accurate anymore). In this example,
830 : * we just accept that. A more rigorous filesystem may want
831 : * to return an error here
832 : */
833 0 : if (vfsdev->mount_opts.writeback_cache_enabled && (flags & O_APPEND)) {
834 0 : flags &= ~O_APPEND;
835 : }
836 :
837 : /*
838 : * O_DIRECT in guest should not necessarily mean bypassing page
839 : * cache on host as well. If somebody needs that behavior, it
840 : * probably should be a configuration knob in daemon.
841 : */
842 0 : flags &= ~O_DIRECT;
843 :
844 0 : return flags;
845 : }
846 :
847 : static int
848 0 : lo_open(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
849 : {
850 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
851 : int fd, saverr;
852 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.open.fobject;
853 0 : uint32_t flags = fsdev_io->u_in.open.flags;
854 : struct spdk_fsdev_file_handle *fhandle;
855 :
856 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
857 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
858 0 : return -EINVAL;
859 : }
860 :
861 0 : flags = update_open_flags(vfsdev, flags);
862 :
863 0 : fd = openat(vfsdev->proc_self_fd, fobject->fd_str, flags & ~O_NOFOLLOW);
864 0 : if (fd == -1) {
865 0 : saverr = -errno;
866 0 : SPDK_ERRLOG("openat(%d, %s, 0x%08" PRIx32 ") failed with err=%d\n",
867 : vfsdev->proc_self_fd, fobject->fd_str, flags, saverr);
868 0 : return saverr;
869 : }
870 :
871 0 : fhandle = file_handle_create(fobject, fd);
872 0 : if (!fhandle) {
873 0 : SPDK_ERRLOG("cannot create a file handle (fd=%d)\n", fd);
874 0 : close(fd);
875 0 : return -ENOMEM;
876 : }
877 :
878 0 : fsdev_io->u_out.open.fhandle = fhandle;
879 :
880 0 : SPDK_DEBUGLOG(fsdev_aio, "OPEN succeeded for " FOBJECT_FMT " (fh=%p, fd=%d)\n",
881 : FOBJECT_ARGS(fobject), fhandle, fd);
882 :
883 0 : return 0;
884 : }
885 :
886 : static int
887 0 : lo_flush(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
888 : {
889 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
890 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.flush.fobject;
891 0 : struct spdk_fsdev_file_handle *fhandle = fsdev_io->u_in.flush.fhandle;
892 : int res, saverr;
893 :
894 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
895 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
896 0 : return -EINVAL;
897 : }
898 :
899 0 : if (!fsdev_aio_is_valid_fhandle(vfsdev, fhandle)) {
900 0 : SPDK_ERRLOG("Invalid fhandle: %p\n", fhandle);
901 0 : return -EINVAL;
902 : }
903 :
904 0 : res = close(dup(fhandle->fd));
905 0 : if (res) {
906 0 : saverr = -errno;
907 0 : SPDK_ERRLOG("close(dup(%d)) failed for " FOBJECT_FMT " (fh=%p, err=%d)\n",
908 : fhandle->fd, FOBJECT_ARGS(fobject), fhandle, saverr);
909 0 : return saverr;
910 : }
911 :
912 0 : SPDK_DEBUGLOG(fsdev_aio, "FLUSH succeeded for " FOBJECT_FMT " (fh=%p)\n", FOBJECT_ARGS(fobject),
913 : fhandle);
914 :
915 0 : return 0;
916 : }
917 :
918 : static int
919 0 : lo_setattr(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
920 : {
921 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
922 : int saverr;
923 : int res;
924 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.setattr.fobject;
925 0 : struct spdk_fsdev_file_handle *fhandle = fsdev_io->u_in.setattr.fhandle;
926 0 : uint32_t to_set = fsdev_io->u_in.setattr.to_set;
927 0 : struct spdk_fsdev_file_attr *attr = &fsdev_io->u_in.setattr.attr;
928 :
929 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
930 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
931 0 : return -EINVAL;
932 : }
933 :
934 0 : if (to_set & FSDEV_SET_ATTR_MODE) {
935 0 : if (fhandle) {
936 0 : res = fchmod(fhandle->fd, attr->mode);
937 : } else {
938 0 : res = fchmodat(vfsdev->proc_self_fd, fobject->fd_str, attr->mode, 0);
939 : }
940 0 : if (res == -1) {
941 0 : saverr = -errno;
942 0 : SPDK_ERRLOG("fchmod failed for " FOBJECT_FMT "\n", FOBJECT_ARGS(fobject));
943 0 : return saverr;
944 : }
945 : }
946 :
947 0 : if (to_set & (FSDEV_SET_ATTR_UID | FSDEV_SET_ATTR_GID)) {
948 0 : uid_t uid = (to_set & FSDEV_SET_ATTR_UID) ? attr->uid : (uid_t) -1;
949 0 : gid_t gid = (to_set & FSDEV_SET_ATTR_GID) ? attr->gid : (gid_t) -1;
950 :
951 0 : res = fchownat(fobject->fd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
952 0 : if (res == -1) {
953 0 : saverr = -errno;
954 0 : SPDK_ERRLOG("fchownat failed for " FOBJECT_FMT "\n", FOBJECT_ARGS(fobject));
955 0 : return saverr;
956 : }
957 : }
958 :
959 0 : if (to_set & FSDEV_SET_ATTR_SIZE) {
960 : int truncfd;
961 :
962 0 : if (fhandle) {
963 0 : truncfd = fhandle->fd;
964 : } else {
965 0 : truncfd = openat(vfsdev->proc_self_fd, fobject->fd_str, O_RDWR);
966 0 : if (truncfd < 0) {
967 0 : saverr = -errno;
968 0 : SPDK_ERRLOG("openat failed for " FOBJECT_FMT "\n", FOBJECT_ARGS(fobject));
969 0 : return saverr;
970 : }
971 : }
972 :
973 0 : res = ftruncate(truncfd, attr->size);
974 0 : if (!fhandle) {
975 0 : saverr = -errno;
976 0 : close(truncfd);
977 0 : errno = saverr;
978 : }
979 0 : if (res == -1) {
980 0 : saverr = -errno;
981 0 : SPDK_ERRLOG("ftruncate failed for " FOBJECT_FMT " (size=%" PRIu64 ")\n", FOBJECT_ARGS(fobject),
982 : attr->size);
983 0 : return saverr;
984 : }
985 : }
986 :
987 0 : if (to_set & (FSDEV_SET_ATTR_ATIME | FSDEV_SET_ATTR_MTIME)) {
988 0 : struct timespec tv[2];
989 :
990 0 : tv[0].tv_sec = 0;
991 0 : tv[1].tv_sec = 0;
992 0 : tv[0].tv_nsec = UTIME_OMIT;
993 0 : tv[1].tv_nsec = UTIME_OMIT;
994 :
995 0 : if (to_set & FSDEV_SET_ATTR_ATIME_NOW) {
996 0 : tv[0].tv_nsec = UTIME_NOW;
997 0 : } else if (to_set & FSDEV_SET_ATTR_ATIME) {
998 0 : tv[0].tv_sec = attr->atime;
999 0 : tv[0].tv_nsec = attr->atimensec;
1000 : }
1001 :
1002 0 : if (to_set & FSDEV_SET_ATTR_MTIME_NOW) {
1003 0 : tv[1].tv_nsec = UTIME_NOW;
1004 0 : } else if (to_set & FSDEV_SET_ATTR_MTIME) {
1005 0 : tv[1].tv_sec = attr->mtime;
1006 0 : tv[1].tv_nsec = attr->mtimensec;
1007 : }
1008 :
1009 0 : if (fhandle) {
1010 0 : res = futimens(fhandle->fd, tv);
1011 : } else {
1012 0 : res = utimensat_empty(vfsdev, fobject, tv);
1013 : }
1014 0 : if (res == -1) {
1015 0 : saverr = -errno;
1016 0 : SPDK_ERRLOG("futimens/utimensat_empty failed for " FOBJECT_FMT "\n",
1017 : FOBJECT_ARGS(fobject));
1018 0 : return saverr;
1019 : }
1020 : }
1021 :
1022 0 : res = file_object_fill_attr(fobject, &fsdev_io->u_out.setattr.attr);
1023 0 : if (res) {
1024 0 : SPDK_ERRLOG("file_object_fill_attr failed for " FOBJECT_FMT "\n",
1025 : FOBJECT_ARGS(fobject));
1026 0 : return res;
1027 : }
1028 :
1029 0 : SPDK_DEBUGLOG(fsdev_aio, "SETATTR succeeded for " FOBJECT_FMT "\n",
1030 : FOBJECT_ARGS(fobject));
1031 :
1032 0 : return 0;
1033 : }
1034 :
1035 : static int
1036 0 : lo_create(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1037 : {
1038 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1039 : int fd;
1040 : int err;
1041 0 : struct spdk_fsdev_file_object *parent_fobject = fsdev_io->u_in.create.parent_fobject;
1042 0 : const char *name = fsdev_io->u_in.create.name;
1043 0 : uint32_t mode = fsdev_io->u_in.create.mode;
1044 0 : uint32_t flags = fsdev_io->u_in.create.flags;
1045 0 : uint32_t umask = fsdev_io->u_in.create.umask;
1046 0 : struct lo_cred old_cred, new_cred = {
1047 0 : .euid = fsdev_io->u_in.create.euid,
1048 0 : .egid = fsdev_io->u_in.create.egid,
1049 : };
1050 0 : struct spdk_fsdev_file_object *fobject;
1051 : struct spdk_fsdev_file_handle *fhandle;
1052 0 : struct spdk_fsdev_file_attr *attr = &fsdev_io->u_out.create.attr;
1053 :
1054 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, parent_fobject)) {
1055 0 : SPDK_ERRLOG("Invalid parent_fobject: %p\n", parent_fobject);
1056 0 : return -EINVAL;
1057 : }
1058 :
1059 : UNUSED(umask);
1060 :
1061 0 : if (!is_safe_path_component(name)) {
1062 0 : SPDK_ERRLOG("CREATE: %s not a safe component\n", name);
1063 0 : return -EINVAL;
1064 : }
1065 :
1066 0 : err = lo_change_cred(&new_cred, &old_cred);
1067 0 : if (err) {
1068 0 : SPDK_ERRLOG("CREATE: cannot change credentials\n");
1069 0 : return err;
1070 : }
1071 :
1072 0 : flags = update_open_flags(vfsdev, flags);
1073 :
1074 0 : fd = openat(parent_fobject->fd, name, (flags | O_CREAT) & ~O_NOFOLLOW, mode);
1075 0 : err = fd == -1 ? -errno : 0;
1076 0 : lo_restore_cred(&old_cred);
1077 :
1078 0 : if (err) {
1079 0 : SPDK_ERRLOG("CREATE: openat failed with %d\n", err);
1080 0 : return err;
1081 : }
1082 :
1083 0 : err = lo_do_lookup(vfsdev, parent_fobject, name, &fobject, attr);
1084 0 : if (err) {
1085 0 : SPDK_ERRLOG("CREATE: lookup failed with %d\n", err);
1086 0 : return err;
1087 : }
1088 :
1089 0 : fhandle = file_handle_create(fobject, fd);
1090 0 : if (!fhandle) {
1091 0 : SPDK_ERRLOG("cannot create a file handle (fd=%d)\n", fd);
1092 0 : close(fd);
1093 0 : file_object_unref(fobject, 1);
1094 0 : return -ENOMEM;
1095 : }
1096 :
1097 0 : SPDK_DEBUGLOG(fsdev_aio, "CREATE: succeeded (name=%s " FOBJECT_FMT " fh=%p)\n",
1098 : name, FOBJECT_ARGS(fobject), fhandle);
1099 :
1100 0 : fsdev_io->u_out.create.fobject = fobject;
1101 0 : fsdev_io->u_out.create.fhandle = fhandle;
1102 :
1103 0 : return 0;
1104 : }
1105 :
1106 : static int
1107 0 : lo_release(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1108 : {
1109 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1110 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.release.fobject;
1111 0 : struct spdk_fsdev_file_handle *fhandle = fsdev_io->u_in.release.fhandle;
1112 :
1113 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1114 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1115 0 : return -EINVAL;
1116 : }
1117 :
1118 0 : if (!fsdev_aio_is_valid_fhandle(vfsdev, fhandle)) {
1119 0 : SPDK_ERRLOG("Invalid fhandle: %p\n", fhandle);
1120 0 : return -EINVAL;
1121 : }
1122 :
1123 0 : SPDK_DEBUGLOG(fsdev_aio, "RELEASE succeeded for " FOBJECT_FMT " fh=%p)\n",
1124 : FOBJECT_ARGS(fobject), fhandle);
1125 :
1126 0 : file_handle_delete(fhandle);
1127 :
1128 0 : return 0;
1129 : }
1130 :
1131 : static void
1132 0 : lo_read_cb(void *ctx, uint32_t data_size, int error)
1133 : {
1134 0 : struct spdk_fsdev_io *fsdev_io = ctx;
1135 0 : struct aio_fsdev_io *vfsdev_io = fsdev_to_aio_io(fsdev_io);
1136 :
1137 0 : if (vfsdev_io->aio) {
1138 0 : TAILQ_REMOVE(&vfsdev_io->ch->ios_in_progress, vfsdev_io, link);
1139 : }
1140 :
1141 0 : fsdev_io->u_out.read.data_size = data_size;
1142 :
1143 0 : spdk_fsdev_io_complete(fsdev_io, error);
1144 0 : }
1145 :
1146 : static int
1147 0 : lo_read(struct spdk_io_channel *_ch, struct spdk_fsdev_io *fsdev_io)
1148 : {
1149 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1150 0 : struct aio_io_channel *ch = spdk_io_channel_get_ctx(_ch);
1151 0 : struct aio_fsdev_io *vfsdev_io = fsdev_to_aio_io(fsdev_io);
1152 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.read.fobject;
1153 0 : struct spdk_fsdev_file_handle *fhandle = fsdev_io->u_in.read.fhandle;
1154 0 : size_t size = fsdev_io->u_in.read.size;
1155 0 : uint64_t offs = fsdev_io->u_in.read.offs;
1156 0 : uint32_t flags = fsdev_io->u_in.read.flags;
1157 0 : struct iovec *outvec = fsdev_io->u_in.read.iov;
1158 0 : uint32_t outcnt = fsdev_io->u_in.read.iovcnt;
1159 :
1160 : /* we don't suport the memory domains at the moment */
1161 0 : assert(!fsdev_io->u_in.read.opts || !fsdev_io->u_in.read.opts->memory_domain);
1162 :
1163 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1164 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1165 0 : return -EINVAL;
1166 : }
1167 :
1168 0 : if (!fsdev_aio_is_valid_fhandle(vfsdev, fhandle)) {
1169 0 : SPDK_ERRLOG("Invalid fhandle: %p\n", fhandle);
1170 0 : return -EINVAL;
1171 : }
1172 :
1173 : UNUSED(flags);
1174 :
1175 0 : if (!outcnt || !outvec) {
1176 0 : SPDK_ERRLOG("bad outvec: iov=%p outcnt=%" PRIu32 "\n", outvec, outcnt);
1177 0 : return -EINVAL;
1178 : }
1179 :
1180 0 : if (vfsdev->skip_rw) {
1181 : uint32_t i;
1182 :
1183 0 : fsdev_io->u_out.read.data_size = 0;
1184 :
1185 0 : for (i = 0; i < outcnt; i++, outvec++) {
1186 0 : fsdev_io->u_out.read.data_size += outvec->iov_len;
1187 : }
1188 :
1189 0 : TAILQ_INSERT_TAIL(&ch->ios_to_complete, vfsdev_io, link);
1190 :
1191 0 : return IO_STATUS_ASYNC;
1192 : }
1193 :
1194 0 : vfsdev_io->aio = spdk_aio_mgr_read(ch->mgr, lo_read_cb, fsdev_io, fhandle->fd, offs, size, outvec,
1195 : outcnt);
1196 0 : if (vfsdev_io->aio) {
1197 0 : vfsdev_io->ch = ch;
1198 0 : TAILQ_INSERT_TAIL(&ch->ios_in_progress, vfsdev_io, link);
1199 : }
1200 :
1201 0 : return IO_STATUS_ASYNC;
1202 : }
1203 :
1204 : static void
1205 0 : lo_write_cb(void *ctx, uint32_t data_size, int error)
1206 : {
1207 0 : struct spdk_fsdev_io *fsdev_io = ctx;
1208 0 : struct aio_fsdev_io *vfsdev_io = fsdev_to_aio_io(fsdev_io);
1209 :
1210 0 : if (vfsdev_io->aio) {
1211 0 : TAILQ_REMOVE(&vfsdev_io->ch->ios_in_progress, vfsdev_io, link);
1212 : }
1213 :
1214 0 : fsdev_io->u_out.write.data_size = data_size;
1215 :
1216 0 : spdk_fsdev_io_complete(fsdev_io, error);
1217 0 : }
1218 :
1219 : static int
1220 0 : lo_write(struct spdk_io_channel *_ch, struct spdk_fsdev_io *fsdev_io)
1221 : {
1222 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1223 0 : struct aio_io_channel *ch = spdk_io_channel_get_ctx(_ch);
1224 0 : struct aio_fsdev_io *vfsdev_io = fsdev_to_aio_io(fsdev_io);
1225 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.write.fobject;
1226 0 : struct spdk_fsdev_file_handle *fhandle = fsdev_io->u_in.write.fhandle;
1227 0 : size_t size = fsdev_io->u_in.write.size;
1228 0 : uint64_t offs = fsdev_io->u_in.write.offs;
1229 0 : uint32_t flags = fsdev_io->u_in.write.flags;
1230 0 : const struct iovec *invec = fsdev_io->u_in.write.iov;
1231 0 : uint32_t incnt = fsdev_io->u_in.write.iovcnt;
1232 :
1233 : /* we don't suport the memory domains at the moment */
1234 0 : assert(!fsdev_io->u_in.write.opts || !fsdev_io->u_in.write.opts->memory_domain);
1235 :
1236 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1237 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1238 0 : return -EINVAL;
1239 : }
1240 :
1241 0 : if (!fsdev_aio_is_valid_fhandle(vfsdev, fhandle)) {
1242 0 : SPDK_ERRLOG("Invalid fhandle: %p\n", fhandle);
1243 0 : return -EINVAL;
1244 : }
1245 :
1246 : UNUSED(flags);
1247 :
1248 0 : if (!incnt || !invec) { /* there should be at least one iovec with data */
1249 0 : SPDK_ERRLOG("bad invec: iov=%p cnt=%" PRIu32 "\n", invec, incnt);
1250 0 : return -EINVAL;
1251 : }
1252 :
1253 0 : if (vfsdev->skip_rw) {
1254 : uint32_t i;
1255 :
1256 0 : fsdev_io->u_out.write.data_size = 0;
1257 0 : for (i = 0; i < incnt; i++, invec++) {
1258 0 : fsdev_io->u_out.write.data_size += invec->iov_len;
1259 : }
1260 :
1261 0 : TAILQ_INSERT_TAIL(&ch->ios_to_complete, vfsdev_io, link);
1262 :
1263 0 : return IO_STATUS_ASYNC;
1264 : }
1265 :
1266 0 : vfsdev_io->aio = spdk_aio_mgr_write(ch->mgr, lo_write_cb, fsdev_io,
1267 : fhandle->fd, offs, size, invec, incnt);
1268 0 : if (vfsdev_io->aio) {
1269 0 : vfsdev_io->ch = ch;
1270 0 : TAILQ_INSERT_TAIL(&ch->ios_in_progress, vfsdev_io, link);
1271 : }
1272 :
1273 0 : return IO_STATUS_ASYNC;
1274 : }
1275 :
1276 : static int
1277 0 : lo_readlink(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1278 : {
1279 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1280 : int res;
1281 : char *buf;
1282 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.readlink.fobject;
1283 :
1284 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1285 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1286 0 : return -EINVAL;
1287 : }
1288 :
1289 0 : buf = malloc(PATH_MAX + 1);
1290 0 : if (!buf) {
1291 0 : SPDK_ERRLOG("malloc(%zu) failed\n", (size_t)(PATH_MAX + 1));
1292 0 : return -ENOMEM;
1293 : }
1294 :
1295 0 : res = readlinkat(fobject->fd, "", buf, PATH_MAX + 1);
1296 0 : if (res == -1) {
1297 0 : int saverr = -errno;
1298 0 : SPDK_ERRLOG("readlinkat failed for " FOBJECT_FMT " with %d\n",
1299 : FOBJECT_ARGS(fobject), saverr);
1300 0 : free(buf);
1301 0 : return saverr;
1302 : }
1303 :
1304 0 : if (((uint32_t)res) == PATH_MAX + 1) {
1305 0 : SPDK_ERRLOG("buffer is too short\n");
1306 0 : free(buf);
1307 0 : return -ENAMETOOLONG;
1308 : }
1309 :
1310 0 : buf[res] = 0;
1311 0 : fsdev_io->u_out.readlink.linkname = buf;
1312 :
1313 0 : return 0;
1314 : }
1315 :
1316 : static int
1317 0 : lo_statfs(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1318 : {
1319 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1320 : int res;
1321 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.statfs.fobject;
1322 0 : struct statvfs stbuf;
1323 :
1324 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1325 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1326 0 : return -EINVAL;
1327 : }
1328 :
1329 0 : res = fstatvfs(fobject->fd, &stbuf);
1330 0 : if (res == -1) {
1331 0 : int saverr = -errno;
1332 0 : SPDK_ERRLOG("fstatvfs failed with %d\n", saverr);
1333 0 : return saverr;
1334 : }
1335 :
1336 0 : fsdev_io->u_out.statfs.statfs.blocks = stbuf.f_blocks;
1337 0 : fsdev_io->u_out.statfs.statfs.bfree = stbuf.f_bfree;
1338 0 : fsdev_io->u_out.statfs.statfs.bavail = stbuf.f_bavail;
1339 0 : fsdev_io->u_out.statfs.statfs.files = stbuf.f_files;
1340 0 : fsdev_io->u_out.statfs.statfs.ffree = stbuf.f_ffree;
1341 0 : fsdev_io->u_out.statfs.statfs.bsize = stbuf.f_bsize;
1342 0 : fsdev_io->u_out.statfs.statfs.namelen = stbuf.f_namemax;
1343 0 : fsdev_io->u_out.statfs.statfs.frsize = stbuf.f_frsize;
1344 :
1345 0 : return 0;
1346 : }
1347 :
1348 : static int
1349 0 : lo_mknod_symlink(struct spdk_fsdev_io *fsdev_io, struct spdk_fsdev_file_object *parent_fobject,
1350 : const char *name, mode_t mode, dev_t rdev, const char *link, uid_t euid, gid_t egid,
1351 : struct spdk_fsdev_file_object **pfobject, struct spdk_fsdev_file_attr *attr)
1352 : {
1353 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1354 : int res;
1355 : int saverr;
1356 0 : struct lo_cred old_cred, new_cred = {
1357 : .euid = euid,
1358 : .egid = egid,
1359 : };
1360 :
1361 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, parent_fobject)) {
1362 0 : SPDK_ERRLOG("Invalid parent_fobject: %p\n", parent_fobject);
1363 0 : return -EINVAL;
1364 : }
1365 :
1366 0 : if (!is_safe_path_component(name)) {
1367 0 : SPDK_ERRLOG("%s isn'h safe\n", name);
1368 0 : return -EINVAL;
1369 : }
1370 :
1371 0 : res = lo_change_cred(&new_cred, &old_cred);
1372 0 : if (res) {
1373 0 : SPDK_ERRLOG("cannot change cred (err=%d)\n", res);
1374 0 : return res;
1375 : }
1376 :
1377 0 : if (S_ISDIR(mode)) {
1378 0 : res = mkdirat(parent_fobject->fd, name, mode);
1379 0 : } else if (S_ISLNK(mode)) {
1380 0 : if (link) {
1381 0 : res = symlinkat(link, parent_fobject->fd, name);
1382 : } else {
1383 0 : SPDK_ERRLOG("NULL link pointer\n");
1384 0 : errno = EINVAL;
1385 : }
1386 : } else {
1387 0 : res = mknodat(parent_fobject->fd, name, mode, rdev);
1388 : }
1389 0 : saverr = -errno;
1390 :
1391 0 : lo_restore_cred(&old_cred);
1392 :
1393 0 : if (res == -1) {
1394 0 : SPDK_ERRLOG("cannot mkdirat/symlinkat/mknodat (err=%d)\n", saverr);
1395 0 : return saverr;
1396 : }
1397 :
1398 0 : res = lo_do_lookup(vfsdev, parent_fobject, name, pfobject, attr);
1399 0 : if (res) {
1400 0 : SPDK_ERRLOG("lookup failed (err=%d)\n", res);
1401 0 : return res;
1402 : }
1403 :
1404 0 : SPDK_DEBUGLOG(fsdev_aio, "lo_mknod_symlink(" FOBJECT_FMT "/%s -> " FOBJECT_FMT "\n",
1405 : FOBJECT_ARGS(parent_fobject), name, FOBJECT_ARGS(*pfobject));
1406 :
1407 0 : return 0;
1408 : }
1409 :
1410 : static int
1411 0 : lo_mknod(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1412 : {
1413 0 : struct spdk_fsdev_file_object *parent_fobject = fsdev_io->u_in.mknod.parent_fobject;
1414 0 : char *name = fsdev_io->u_in.mknod.name;
1415 0 : mode_t mode = fsdev_io->u_in.mknod.mode;
1416 0 : dev_t rdev = fsdev_io->u_in.mknod.rdev;
1417 0 : uid_t euid = fsdev_io->u_in.mknod.euid;
1418 0 : gid_t egid = fsdev_io->u_in.mknod.egid;
1419 :
1420 0 : return lo_mknod_symlink(fsdev_io, parent_fobject, name, mode, rdev, NULL, euid, egid,
1421 : &fsdev_io->u_out.mknod.fobject, &fsdev_io->u_out.mknod.attr);
1422 : }
1423 :
1424 : static int
1425 0 : lo_mkdir(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1426 : {
1427 0 : struct spdk_fsdev_file_object *parent_fobject = fsdev_io->u_in.mkdir.parent_fobject;
1428 0 : char *name = fsdev_io->u_in.mkdir.name;
1429 0 : mode_t mode = fsdev_io->u_in.mkdir.mode;
1430 0 : uid_t euid = fsdev_io->u_in.mkdir.euid;
1431 0 : gid_t egid = fsdev_io->u_in.mkdir.egid;
1432 :
1433 0 : return lo_mknod_symlink(fsdev_io, parent_fobject, name, S_IFDIR | mode, 0, NULL, euid, egid,
1434 : &fsdev_io->u_out.mkdir.fobject, &fsdev_io->u_out.mkdir.attr);
1435 : }
1436 :
1437 : static int
1438 0 : lo_symlink(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1439 : {
1440 0 : struct spdk_fsdev_file_object *parent_fobject = fsdev_io->u_in.symlink.parent_fobject;
1441 0 : char *target = fsdev_io->u_in.symlink.target;
1442 0 : char *linkpath = fsdev_io->u_in.symlink.linkpath;
1443 0 : uid_t euid = fsdev_io->u_in.symlink.euid;
1444 0 : gid_t egid = fsdev_io->u_in.symlink.egid;
1445 :
1446 0 : return lo_mknod_symlink(fsdev_io, parent_fobject, target, S_IFLNK, 0, linkpath, euid, egid,
1447 : &fsdev_io->u_out.symlink.fobject, &fsdev_io->u_out.symlink.attr);
1448 : }
1449 :
1450 : static int
1451 0 : lo_do_unlink(struct aio_fsdev *vfsdev, struct spdk_fsdev_file_object *parent_fobject,
1452 : const char *name, bool is_dir)
1453 : {
1454 : /* fobject must be initialized to avoid a scan-build false positive */
1455 0 : struct spdk_fsdev_file_object *fobject = NULL;
1456 : int res;
1457 :
1458 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, parent_fobject)) {
1459 0 : SPDK_ERRLOG("Invalid parent_fobject: %p\n", parent_fobject);
1460 0 : return -EINVAL;
1461 : }
1462 :
1463 0 : if (!is_safe_path_component(name)) {
1464 0 : SPDK_ERRLOG("%s isn't safe\n", name);
1465 0 : return -EINVAL;
1466 : }
1467 :
1468 0 : res = lo_do_lookup(vfsdev, parent_fobject, name, &fobject, NULL);
1469 0 : if (res) {
1470 0 : SPDK_ERRLOG("can't find '%s' under " FOBJECT_FMT "\n", name, FOBJECT_ARGS(parent_fobject));
1471 0 : return -EIO;
1472 : }
1473 :
1474 0 : res = unlinkat(parent_fobject->fd, name, is_dir ? AT_REMOVEDIR : 0);
1475 0 : if (res) {
1476 0 : res = -errno;
1477 0 : SPDK_WARNLOG("unlinkat(" FOBJECT_FMT " %s) failed (err=%d)\n",
1478 : FOBJECT_ARGS(parent_fobject), name, res);
1479 : }
1480 :
1481 0 : file_object_unref(fobject, 1);
1482 0 : return res;
1483 : }
1484 :
1485 : static int
1486 0 : lo_unlink(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1487 : {
1488 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1489 0 : struct spdk_fsdev_file_object *parent_fobject = fsdev_io->u_in.unlink.parent_fobject;
1490 0 : char *name = fsdev_io->u_in.unlink.name;
1491 :
1492 0 : return lo_do_unlink(vfsdev, parent_fobject, name, false);
1493 : }
1494 :
1495 : static int
1496 0 : lo_rmdir(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1497 : {
1498 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1499 0 : struct spdk_fsdev_file_object *parent_fobject = fsdev_io->u_in.rmdir.parent_fobject;
1500 0 : char *name = fsdev_io->u_in.rmdir.name;
1501 :
1502 0 : return lo_do_unlink(vfsdev, parent_fobject, name, true);
1503 : }
1504 :
1505 : static int
1506 0 : lo_rename(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1507 : {
1508 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1509 : int res, saverr;
1510 : /* old_fobject must be initialized to avoid a scan-build false positive */
1511 0 : struct spdk_fsdev_file_object *old_fobject = NULL;
1512 0 : struct spdk_fsdev_file_object *parent_fobject = fsdev_io->u_in.rename.parent_fobject;
1513 0 : char *name = fsdev_io->u_in.rename.name;
1514 0 : struct spdk_fsdev_file_object *new_parent_fobject = fsdev_io->u_in.rename.new_parent_fobject;
1515 0 : char *new_name = fsdev_io->u_in.rename.new_name;
1516 0 : uint32_t flags = fsdev_io->u_in.rename.flags;
1517 :
1518 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, parent_fobject)) {
1519 0 : SPDK_ERRLOG("Invalid parent_fobject: %p\n", parent_fobject);
1520 0 : return -EINVAL;
1521 : }
1522 :
1523 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, new_parent_fobject)) {
1524 0 : SPDK_ERRLOG("Invalid new_parent_fobject: %p\n", new_parent_fobject);
1525 0 : return -EINVAL;
1526 : }
1527 :
1528 0 : if (!is_safe_path_component(name)) {
1529 0 : SPDK_ERRLOG("name '%s' isn't safe\n", name);
1530 0 : return -EINVAL;
1531 : }
1532 :
1533 0 : if (!is_safe_path_component(new_name)) {
1534 0 : SPDK_ERRLOG("newname '%s' isn't safe\n", new_name);
1535 0 : return -EINVAL;
1536 : }
1537 :
1538 0 : res = lo_do_lookup(vfsdev, parent_fobject, name, &old_fobject, NULL);
1539 0 : if (res) {
1540 0 : SPDK_ERRLOG("can't find '%s' under " FOBJECT_FMT "\n", name, FOBJECT_ARGS(parent_fobject));
1541 0 : return -EIO;
1542 : }
1543 :
1544 0 : saverr = 0;
1545 0 : if (flags) {
1546 : #ifndef SYS_renameat2
1547 : SPDK_ERRLOG("flags are not supported\n");
1548 : return -ENOTSUP;
1549 : #else
1550 0 : res = syscall(SYS_renameat2, parent_fobject->fd, name, new_parent_fobject->fd,
1551 : new_name, flags);
1552 0 : if (res == -1 && errno == ENOSYS) {
1553 0 : SPDK_ERRLOG("SYS_renameat2 returned ENOSYS\n");
1554 0 : saverr = -EINVAL;
1555 0 : } else if (res == -1) {
1556 0 : saverr = -errno;
1557 0 : SPDK_ERRLOG("SYS_renameat2 failed (err=%d))\n", saverr);
1558 : }
1559 : #endif
1560 : } else {
1561 0 : res = renameat(parent_fobject->fd, name, new_parent_fobject->fd, new_name);
1562 0 : if (res == -1) {
1563 0 : saverr = -errno;
1564 0 : SPDK_ERRLOG("renameat failed (err=%d)\n", saverr);
1565 : }
1566 : }
1567 :
1568 0 : file_object_unref(old_fobject, 1);
1569 :
1570 0 : return saverr;
1571 : }
1572 :
1573 : static int
1574 0 : linkat_empty_nofollow(struct aio_fsdev *vfsdev, struct spdk_fsdev_file_object *fobject, int dfd,
1575 : const char *name)
1576 : {
1577 : int res;
1578 :
1579 0 : if (fobject->is_symlink) {
1580 0 : res = linkat(fobject->fd, "", dfd, name, AT_EMPTY_PATH);
1581 0 : if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
1582 : /* Sorry, no race free way to hard-link a symlink. */
1583 0 : errno = EPERM;
1584 : }
1585 : } else {
1586 0 : res = linkat(vfsdev->proc_self_fd, fobject->fd_str, dfd, name, AT_SYMLINK_FOLLOW);
1587 : }
1588 :
1589 0 : return res;
1590 : }
1591 :
1592 : static int
1593 0 : lo_link(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1594 : {
1595 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1596 : int res;
1597 : int saverr;
1598 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.link.fobject;
1599 0 : struct spdk_fsdev_file_object *new_parent_fobject = fsdev_io->u_in.link.new_parent_fobject;
1600 0 : char *name = fsdev_io->u_in.link.name;
1601 :
1602 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1603 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1604 0 : return -EINVAL;
1605 : }
1606 :
1607 0 : if (!is_safe_path_component(name)) {
1608 0 : SPDK_ERRLOG("%s is not a safe component\n", name);
1609 0 : return -EINVAL;
1610 : }
1611 :
1612 0 : res = linkat_empty_nofollow(vfsdev, fobject, new_parent_fobject->fd, name);
1613 0 : if (res == -1) {
1614 0 : saverr = -errno;
1615 0 : SPDK_ERRLOG("linkat_empty_nofollow failed " FOBJECT_FMT " -> " FOBJECT_FMT " name=%s (err=%d)\n",
1616 : FOBJECT_ARGS(fobject), FOBJECT_ARGS(new_parent_fobject), name, saverr);
1617 0 : return saverr;
1618 : }
1619 :
1620 0 : res = lo_do_lookup(vfsdev, new_parent_fobject, name, &fsdev_io->u_out.link.fobject,
1621 : &fsdev_io->u_out.link.attr);
1622 0 : if (res) {
1623 0 : SPDK_ERRLOG("lookup failed (err=%d)\n", res);
1624 0 : return res;
1625 : }
1626 :
1627 0 : SPDK_DEBUGLOG(fsdev_aio, "LINK succeeded for " FOBJECT_FMT " -> " FOBJECT_FMT " name=%s\n",
1628 : FOBJECT_ARGS(fobject), FOBJECT_ARGS(fsdev_io->u_out.link.fobject), name);
1629 :
1630 0 : return 0;
1631 : }
1632 :
1633 : static int
1634 0 : lo_fsync(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1635 : {
1636 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1637 : int res, saverr, fd;
1638 0 : char *buf;
1639 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.fsync.fobject;
1640 0 : struct spdk_fsdev_file_handle *fhandle = fsdev_io->u_in.fsync.fhandle;
1641 0 : bool datasync = fsdev_io->u_in.fsync.datasync;
1642 :
1643 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1644 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1645 0 : return -EINVAL;
1646 : }
1647 :
1648 0 : if (!fhandle) {
1649 0 : res = asprintf(&buf, "%i", fobject->fd);
1650 0 : if (res == -1) {
1651 0 : saverr = -errno;
1652 0 : SPDK_ERRLOG("asprintf failed (errno=%d)\n", saverr);
1653 0 : return saverr;
1654 : }
1655 :
1656 0 : fd = openat(vfsdev->proc_self_fd, buf, O_RDWR);
1657 0 : saverr = -errno;
1658 0 : free(buf);
1659 0 : if (fd == -1) {
1660 0 : SPDK_ERRLOG("openat failed (errno=%d)\n", saverr);
1661 0 : return saverr;
1662 : }
1663 : } else {
1664 0 : fd = fhandle->fd;
1665 : }
1666 :
1667 0 : if (datasync) {
1668 0 : res = fdatasync(fd);
1669 : } else {
1670 0 : res = fsync(fd);
1671 : }
1672 :
1673 0 : saverr = -errno;
1674 0 : if (!fhandle) {
1675 0 : close(fd);
1676 : }
1677 :
1678 0 : if (res == -1) {
1679 0 : SPDK_ERRLOG("fdatasync/fsync failed for " FOBJECT_FMT " fh=%p (err=%d)\n",
1680 : FOBJECT_ARGS(fobject), fhandle, saverr);
1681 0 : return saverr;
1682 : }
1683 :
1684 0 : SPDK_DEBUGLOG(fsdev_aio, "FSYNC succeeded for " FOBJECT_FMT " fh=%p\n",
1685 : FOBJECT_ARGS(fobject), fhandle);
1686 :
1687 0 : return 0;
1688 : }
1689 :
1690 : static int
1691 0 : lo_setxattr(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1692 : {
1693 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1694 : ssize_t ret;
1695 : int saverr;
1696 0 : int fd = -1;
1697 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.setxattr.fobject;
1698 0 : char *name = fsdev_io->u_in.setxattr.name;
1699 0 : char *value = fsdev_io->u_in.setxattr.value;
1700 0 : uint32_t size = fsdev_io->u_in.setxattr.size;
1701 0 : uint32_t flags = fsdev_io->u_in.setxattr.flags;
1702 :
1703 0 : if (!vfsdev->xattr_enabled) {
1704 0 : SPDK_INFOLOG(fsdev_aio, "xattr is disabled by config\n");
1705 0 : return -ENOSYS;
1706 : }
1707 :
1708 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1709 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1710 0 : return -EINVAL;
1711 : }
1712 :
1713 0 : if (fobject->is_symlink) {
1714 : /* Sorry, no race free way to removexattr on symlink. */
1715 0 : SPDK_ERRLOG("cannot set xattr for symlink\n");
1716 0 : return -EPERM;
1717 : }
1718 :
1719 0 : fd = openat(vfsdev->proc_self_fd, fobject->fd_str, O_RDWR);
1720 0 : if (fd < 0) {
1721 0 : saverr = -errno;
1722 0 : SPDK_ERRLOG("openat failed with errno=%d\n", saverr);
1723 0 : return saverr;
1724 : }
1725 :
1726 0 : ret = fsetxattr(fd, name, value, size, flags);
1727 0 : saverr = -errno;
1728 0 : close(fd);
1729 0 : if (ret == -1) {
1730 0 : if (saverr == -ENOTSUP) {
1731 0 : SPDK_INFOLOG(fsdev_aio, "flistxattr: extended attributes are not supported or disabled\n");
1732 : } else {
1733 0 : SPDK_ERRLOG("flistxattr failed with errno=%d\n", saverr);
1734 : }
1735 0 : return saverr;
1736 : }
1737 :
1738 0 : SPDK_DEBUGLOG(fsdev_aio,
1739 : "SETXATTR succeeded for " FOBJECT_FMT " name=%s value=%s size=%" PRIu32 "flags=0x%x" PRIx32 "\n",
1740 : FOBJECT_ARGS(fobject), name, value, size, flags);
1741 :
1742 0 : return 0;
1743 : }
1744 :
1745 : static int
1746 0 : lo_getxattr(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1747 : {
1748 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1749 : ssize_t ret;
1750 : int saverr;
1751 0 : int fd = -1;
1752 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.getxattr.fobject;
1753 0 : char *name = fsdev_io->u_in.getxattr.name;
1754 0 : void *buffer = fsdev_io->u_in.getxattr.buffer;
1755 0 : size_t size = fsdev_io->u_in.getxattr.size;
1756 :
1757 0 : if (!vfsdev->xattr_enabled) {
1758 0 : SPDK_INFOLOG(fsdev_aio, "xattr is disabled by config\n");
1759 0 : return -ENOSYS;
1760 : }
1761 :
1762 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1763 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1764 0 : return -EINVAL;
1765 : }
1766 :
1767 0 : if (fobject->is_symlink) {
1768 : /* Sorry, no race free way to getxattr on symlink. */
1769 0 : SPDK_ERRLOG("cannot get xattr for symlink\n");
1770 0 : return -EPERM;
1771 : }
1772 :
1773 0 : fd = openat(vfsdev->proc_self_fd, fobject->fd_str, O_RDWR);
1774 0 : if (fd < 0) {
1775 0 : saverr = -errno;
1776 0 : SPDK_ERRLOG("openat failed with errno=%d\n", saverr);
1777 0 : return saverr;
1778 : }
1779 :
1780 0 : ret = fgetxattr(fd, name, buffer, size);
1781 0 : saverr = -errno;
1782 0 : close(fd);
1783 0 : if (ret == -1) {
1784 0 : if (saverr == -ENODATA) {
1785 0 : SPDK_INFOLOG(fsdev_aio, "fgetxattr: no extended attribute '%s' found\n", name);
1786 0 : } else if (saverr == -ENOTSUP) {
1787 0 : SPDK_INFOLOG(fsdev_aio, "fgetxattr: extended attributes are not supported or disabled\n");
1788 : } else {
1789 0 : SPDK_ERRLOG("fgetxattr failed with errno=%d\n", saverr);
1790 : }
1791 0 : return saverr;
1792 : }
1793 :
1794 0 : fsdev_io->u_out.getxattr.value_size = ret;
1795 :
1796 0 : SPDK_DEBUGLOG(fsdev_aio,
1797 : "GETXATTR succeeded for " FOBJECT_FMT " name=%s value=%s value_size=%zd\n",
1798 : FOBJECT_ARGS(fobject), name, (char *)buffer, ret);
1799 :
1800 0 : return 0;
1801 : }
1802 :
1803 : static int
1804 0 : lo_listxattr(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1805 : {
1806 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1807 : ssize_t ret;
1808 : int saverr;
1809 0 : int fd = -1;
1810 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.listxattr.fobject;
1811 0 : char *buffer = fsdev_io->u_in.listxattr.buffer;
1812 0 : size_t size = fsdev_io->u_in.listxattr.size;
1813 :
1814 0 : if (!vfsdev->xattr_enabled) {
1815 0 : SPDK_INFOLOG(fsdev_aio, "xattr is disabled by config\n");
1816 0 : return -ENOSYS;
1817 : }
1818 :
1819 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1820 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1821 0 : return -EINVAL;
1822 : }
1823 :
1824 0 : if (fobject->is_symlink) {
1825 : /* Sorry, no race free way to listxattr on symlink. */
1826 0 : SPDK_ERRLOG("cannot list xattr for symlink\n");
1827 0 : return -EPERM;
1828 : }
1829 :
1830 0 : fd = openat(vfsdev->proc_self_fd, fobject->fd_str, O_RDONLY);
1831 0 : if (fd < 0) {
1832 0 : saverr = -errno;
1833 0 : SPDK_ERRLOG("openat failed with errno=%d\n", saverr);
1834 0 : return saverr;
1835 : }
1836 :
1837 0 : ret = flistxattr(fd, buffer, size);
1838 0 : saverr = -errno;
1839 0 : close(fd);
1840 0 : if (ret == -1) {
1841 0 : if (saverr == -ENOTSUP) {
1842 0 : SPDK_INFOLOG(fsdev_aio, "flistxattr: extended attributes are not supported or disabled\n");
1843 : } else {
1844 0 : SPDK_ERRLOG("flistxattr failed with errno=%d\n", saverr);
1845 : }
1846 0 : return saverr;
1847 : }
1848 :
1849 0 : fsdev_io->u_out.listxattr.data_size = ret;
1850 0 : fsdev_io->u_out.listxattr.size_only = (size == 0);
1851 :
1852 0 : SPDK_DEBUGLOG(fsdev_aio, "LISTXATTR succeeded for " FOBJECT_FMT " data_size=%zu\n",
1853 : FOBJECT_ARGS(fobject), ret);
1854 :
1855 0 : return 0;
1856 : }
1857 :
1858 : static int
1859 0 : lo_removexattr(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1860 : {
1861 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1862 : ssize_t ret;
1863 : int saverr;
1864 0 : int fd = -1;
1865 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.removexattr.fobject;
1866 0 : char *name = fsdev_io->u_in.removexattr.name;
1867 :
1868 0 : if (!vfsdev->xattr_enabled) {
1869 0 : SPDK_INFOLOG(fsdev_aio, "xattr is disabled by config\n");
1870 0 : return -ENOSYS;
1871 : }
1872 :
1873 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1874 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1875 0 : return -EINVAL;
1876 : }
1877 :
1878 0 : if (fobject->is_symlink) {
1879 : /* Sorry, no race free way to setxattr on symlink. */
1880 0 : SPDK_ERRLOG("cannot list xattr for symlink\n");
1881 0 : return -EPERM;
1882 : }
1883 :
1884 0 : fd = openat(vfsdev->proc_self_fd, fobject->fd_str, O_RDONLY);
1885 0 : if (fd < 0) {
1886 0 : saverr = -errno;
1887 0 : SPDK_ERRLOG("openat failed with errno=%d\n", saverr);
1888 0 : return saverr;
1889 : }
1890 :
1891 0 : ret = fremovexattr(fd, name);
1892 0 : saverr = -errno;
1893 0 : close(fd);
1894 0 : if (ret == -1) {
1895 0 : if (saverr == -ENODATA) {
1896 0 : SPDK_INFOLOG(fsdev_aio, "fremovexattr: no extended attribute '%s' found\n", name);
1897 0 : } else if (saverr == -ENOTSUP) {
1898 0 : SPDK_INFOLOG(fsdev_aio, "fremovexattr: extended attributes are not supported or disabled\n");
1899 : } else {
1900 0 : SPDK_ERRLOG("fremovexattr failed with errno=%d\n", saverr);
1901 : }
1902 0 : return saverr;
1903 : }
1904 :
1905 0 : SPDK_DEBUGLOG(fsdev_aio, "REMOVEXATTR succeeded for " FOBJECT_FMT " name=%s\n",
1906 : FOBJECT_ARGS(fobject), name);
1907 :
1908 0 : return 0;
1909 : }
1910 :
1911 : static int
1912 0 : lo_fsyncdir(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1913 : {
1914 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1915 : int res;
1916 0 : int saverr = 0;
1917 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.fsyncdir.fobject;
1918 0 : struct spdk_fsdev_file_handle *fhandle = fsdev_io->u_in.fsyncdir.fhandle;
1919 0 : bool datasync = fsdev_io->u_in.fsyncdir.datasync;
1920 :
1921 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1922 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1923 0 : return -EINVAL;
1924 : }
1925 :
1926 0 : if (!fsdev_aio_is_valid_fhandle(vfsdev, fhandle)) {
1927 0 : SPDK_ERRLOG("Invalid fhandle: %p\n", fhandle);
1928 0 : return -EINVAL;
1929 : }
1930 :
1931 0 : if (datasync) {
1932 0 : res = fdatasync(fhandle->fd);
1933 : } else {
1934 0 : res = fsync(fhandle->fd);
1935 : }
1936 :
1937 0 : if (res == -1) {
1938 0 : saverr = -errno;
1939 0 : SPDK_ERRLOG("%s failed for fh=%p with err=%d\n",
1940 : datasync ? "fdatasync" : "fsync", fhandle, saverr);
1941 0 : return saverr;
1942 : }
1943 :
1944 0 : SPDK_DEBUGLOG(fsdev_aio, "FSYNCDIR succeeded for " FOBJECT_FMT " fh=%p datasync=%d\n",
1945 : FOBJECT_ARGS(fobject), fhandle, datasync);
1946 :
1947 0 : return 0;
1948 : }
1949 :
1950 : static int
1951 0 : lo_flock(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1952 : {
1953 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1954 : int res;
1955 0 : int saverr = 0;
1956 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.flock.fobject;
1957 0 : struct spdk_fsdev_file_handle *fhandle = fsdev_io->u_in.flock.fhandle;
1958 0 : int operation = fsdev_io->u_in.flock.operation;
1959 :
1960 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1961 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1962 0 : return -EINVAL;
1963 : }
1964 :
1965 0 : if (!fsdev_aio_is_valid_fhandle(vfsdev, fhandle)) {
1966 0 : SPDK_ERRLOG("Invalid fhandle: %p\n", fhandle);
1967 0 : return -EINVAL;
1968 : }
1969 :
1970 0 : res = flock(fhandle->fd, operation | LOCK_NB);
1971 0 : if (res == -1) {
1972 0 : saverr = -errno;
1973 0 : SPDK_ERRLOG("flock failed for fh=%p with err=%d\n", fhandle, saverr);
1974 0 : return saverr;
1975 : }
1976 :
1977 0 : SPDK_DEBUGLOG(fsdev_aio, "FLOCK succeeded for " FOBJECT_FMT " fh=%p operation=%d\n",
1978 : FOBJECT_ARGS(fobject), fhandle, operation);
1979 :
1980 0 : return 0;
1981 : }
1982 :
1983 : static int
1984 0 : lo_fallocate(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
1985 : {
1986 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
1987 : int err;
1988 0 : struct spdk_fsdev_file_object *fobject = fsdev_io->u_in.fallocate.fobject;
1989 0 : struct spdk_fsdev_file_handle *fhandle = fsdev_io->u_in.fallocate.fhandle;
1990 0 : uint32_t mode = fsdev_io->u_in.fallocate.mode;
1991 0 : uint64_t offset = fsdev_io->u_in.fallocate.offset;
1992 0 : uint64_t length = fsdev_io->u_in.fallocate.length;
1993 :
1994 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject)) {
1995 0 : SPDK_ERRLOG("Invalid fobject: %p\n", fobject);
1996 0 : return -EINVAL;
1997 : }
1998 :
1999 0 : if (!fsdev_aio_is_valid_fhandle(vfsdev, fhandle)) {
2000 0 : SPDK_ERRLOG("Invalid fhandle: %p\n", fhandle);
2001 0 : return -EINVAL;
2002 : }
2003 :
2004 0 : if (mode) {
2005 0 : SPDK_ERRLOG("non-zero mode is not suppored\n");
2006 0 : return -EOPNOTSUPP;
2007 : }
2008 :
2009 0 : err = posix_fallocate(fhandle->fd, offset, length);
2010 0 : if (err) {
2011 0 : SPDK_ERRLOG("posix_fallocate failed for fh=%p with err=%d\n",
2012 : fhandle, err);
2013 : }
2014 :
2015 0 : SPDK_DEBUGLOG(fsdev_aio,
2016 : "FALLOCATE returns %d for " FOBJECT_FMT " fh=%p offset=%" PRIu64 " length=%" PRIu64 "\n",
2017 : err, FOBJECT_ARGS(fobject), fhandle, offset, length);
2018 0 : return err;
2019 : }
2020 :
2021 : static int
2022 0 : lo_copy_file_range(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
2023 : {
2024 : #ifdef SPDK_CONFIG_COPY_FILE_RANGE
2025 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev_io->fsdev);
2026 : ssize_t res;
2027 0 : int saverr = 0;
2028 0 : struct spdk_fsdev_file_object *fobject_in = fsdev_io->u_in.copy_file_range.fobject_in;
2029 0 : struct spdk_fsdev_file_handle *fhandle_in = fsdev_io->u_in.copy_file_range.fhandle_in;
2030 0 : off_t off_in = fsdev_io->u_in.copy_file_range.off_in;
2031 0 : struct spdk_fsdev_file_object *fobject_out = fsdev_io->u_in.copy_file_range.fobject_out;
2032 0 : struct spdk_fsdev_file_handle *fhandle_out = fsdev_io->u_in.copy_file_range.fhandle_out;
2033 0 : off_t off_out = fsdev_io->u_in.copy_file_range.off_out;
2034 0 : size_t len = fsdev_io->u_in.copy_file_range.len;
2035 0 : uint32_t flags = fsdev_io->u_in.copy_file_range.flags;
2036 :
2037 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject_in)) {
2038 0 : SPDK_ERRLOG("Invalid fobject_in: %p\n", fobject_in);
2039 0 : return -EINVAL;
2040 : }
2041 :
2042 0 : if (!fsdev_aio_is_valid_fhandle(vfsdev, fhandle_in)) {
2043 0 : SPDK_ERRLOG("Invalid fhandle_in: %p\n", fhandle_in);
2044 0 : return -EINVAL;
2045 : }
2046 :
2047 0 : if (!fsdev_aio_is_valid_fobject(vfsdev, fobject_out)) {
2048 0 : SPDK_ERRLOG("Invalid fobject_out: %p\n", fobject_out);
2049 0 : return -EINVAL;
2050 : }
2051 :
2052 0 : if (!fsdev_aio_is_valid_fhandle(vfsdev, fhandle_out)) {
2053 0 : SPDK_ERRLOG("Invalid fhandle_out: %p\n", fhandle_out);
2054 0 : return -EINVAL;
2055 : }
2056 :
2057 0 : res = copy_file_range(fhandle_in->fd, &off_in, fhandle_out->fd, &off_out, len, flags);
2058 0 : if (res < 0) {
2059 0 : saverr = -errno;
2060 0 : SPDK_ERRLOG("copy_file_range failed with err=%d\n", saverr);
2061 0 : return saverr;
2062 : }
2063 :
2064 0 : SPDK_DEBUGLOG(fsdev_aio,
2065 : "COPY_FILE_RANGE succeeded for " FOBJECT_FMT " fh=%p offset=%" PRIu64 " -> " FOBJECT_FMT
2066 : " fh=%p offset=%" PRIu64 " (len-%zu flags=0x%" PRIx32 ")\n",
2067 : FOBJECT_ARGS(fobject_in), fhandle_in, (uint64_t)off_in, FOBJECT_ARGS(fobject_out), fhandle_out,
2068 : (uint64_t)off_out, len, flags);
2069 :
2070 0 : return 0;
2071 : #else
2072 : return -ENOSYS;
2073 : #endif
2074 : }
2075 :
2076 : static int
2077 0 : lo_abort(struct spdk_io_channel *_ch, struct spdk_fsdev_io *fsdev_io)
2078 : {
2079 0 : struct aio_io_channel *ch = spdk_io_channel_get_ctx(_ch);
2080 : struct aio_fsdev_io *vfsdev_io;
2081 0 : uint64_t unique_to_abort = fsdev_io->u_in.abort.unique_to_abort;
2082 :
2083 0 : TAILQ_FOREACH(vfsdev_io, &ch->ios_in_progress, link) {
2084 0 : struct spdk_fsdev_io *_fsdev_io = aio_to_fsdev_io(vfsdev_io);
2085 0 : if (spdk_fsdev_io_get_unique(_fsdev_io) == unique_to_abort) {
2086 0 : spdk_aio_mgr_cancel(ch->mgr, vfsdev_io->aio);
2087 0 : return 0;
2088 : }
2089 : }
2090 :
2091 0 : return 0;
2092 : }
2093 :
2094 : static int
2095 0 : aio_io_poll(void *arg)
2096 : {
2097 : struct aio_fsdev_io *vfsdev_io, *tmp;
2098 0 : struct aio_io_channel *ch = arg;
2099 0 : int res = SPDK_POLLER_IDLE;
2100 :
2101 0 : if (spdk_aio_mgr_poll(ch->mgr)) {
2102 0 : res = SPDK_POLLER_BUSY;
2103 : }
2104 :
2105 0 : TAILQ_FOREACH_SAFE(vfsdev_io, &ch->ios_to_complete, link, tmp) {
2106 0 : struct spdk_fsdev_io *fsdev_io = aio_to_fsdev_io(vfsdev_io);
2107 :
2108 0 : TAILQ_REMOVE(&ch->ios_to_complete, vfsdev_io, link);
2109 0 : spdk_fsdev_io_complete(fsdev_io, 0);
2110 0 : res = SPDK_POLLER_BUSY;
2111 : }
2112 :
2113 0 : return res;
2114 : }
2115 :
2116 : static int
2117 0 : aio_fsdev_create_cb(void *io_device, void *ctx_buf)
2118 : {
2119 0 : struct aio_io_channel *ch = ctx_buf;
2120 0 : struct spdk_thread *thread = spdk_get_thread();
2121 :
2122 0 : ch->mgr = spdk_aio_mgr_create(MAX_AIOS);
2123 0 : if (!ch->mgr) {
2124 0 : SPDK_ERRLOG("aoi manager init for failed (thread=%s)\n", spdk_thread_get_name(thread));
2125 0 : return -ENOMEM;
2126 : }
2127 :
2128 0 : ch->poller = SPDK_POLLER_REGISTER(aio_io_poll, ch, 0);
2129 0 : TAILQ_INIT(&ch->ios_in_progress);
2130 0 : TAILQ_INIT(&ch->ios_to_complete);
2131 :
2132 0 : SPDK_DEBUGLOG(fsdev_aio, "Created aio fsdev IO channel: thread %s, thread id %" PRIu64
2133 : "\n",
2134 : spdk_thread_get_name(thread), spdk_thread_get_id(thread));
2135 0 : return 0;
2136 : }
2137 :
2138 : static void
2139 0 : aio_fsdev_destroy_cb(void *io_device, void *ctx_buf)
2140 : {
2141 0 : struct aio_io_channel *ch = ctx_buf;
2142 0 : struct spdk_thread *thread = spdk_get_thread();
2143 :
2144 : UNUSED(thread);
2145 :
2146 0 : spdk_poller_unregister(&ch->poller);
2147 0 : spdk_aio_mgr_delete(ch->mgr);
2148 :
2149 0 : SPDK_DEBUGLOG(fsdev_aio, "Destroyed aio fsdev IO channel: thread %s, thread id %" PRIu64
2150 : "\n",
2151 : spdk_thread_get_name(thread), spdk_thread_get_id(thread));
2152 0 : }
2153 :
2154 : static int
2155 0 : fsdev_aio_initialize(void)
2156 : {
2157 : /*
2158 : * We need to pick some unique address as our "io device" - so just use the
2159 : * address of the global tailq.
2160 : */
2161 0 : spdk_io_device_register(&g_aio_fsdev_head,
2162 : aio_fsdev_create_cb, aio_fsdev_destroy_cb,
2163 : sizeof(struct aio_io_channel), "aio_fsdev");
2164 :
2165 0 : return 0;
2166 : }
2167 :
2168 : static void
2169 0 : _fsdev_aio_finish_cb(void *arg)
2170 : {
2171 : /* @todo: handle async module fini */
2172 : /* spdk_fsdev_module_fini_done(); */
2173 0 : }
2174 :
2175 : static void
2176 0 : fsdev_aio_finish(void)
2177 : {
2178 0 : spdk_io_device_unregister(&g_aio_fsdev_head, _fsdev_aio_finish_cb);
2179 0 : }
2180 :
2181 : static int
2182 0 : fsdev_aio_get_ctx_size(void)
2183 : {
2184 0 : return sizeof(struct aio_fsdev_io);
2185 : }
2186 :
2187 : static struct spdk_fsdev_module aio_fsdev_module = {
2188 : .name = "aio",
2189 : .module_init = fsdev_aio_initialize,
2190 : .module_fini = fsdev_aio_finish,
2191 : .get_ctx_size = fsdev_aio_get_ctx_size,
2192 : };
2193 :
2194 0 : SPDK_FSDEV_MODULE_REGISTER(aio, &aio_fsdev_module);
2195 :
2196 : static void
2197 0 : fsdev_aio_free(struct aio_fsdev *vfsdev)
2198 : {
2199 0 : if (vfsdev->proc_self_fd != -1) {
2200 0 : close(vfsdev->proc_self_fd);
2201 : }
2202 :
2203 0 : if (vfsdev->root) {
2204 0 : int destroyed = file_object_unref(vfsdev->root, 1);
2205 0 : assert(destroyed == 0);
2206 : UNUSED(destroyed);
2207 :
2208 : }
2209 :
2210 0 : free(vfsdev->fsdev.name);
2211 0 : free(vfsdev->root_path);
2212 :
2213 0 : free(vfsdev);
2214 0 : }
2215 :
2216 : static int
2217 0 : fsdev_aio_destruct(void *ctx)
2218 : {
2219 0 : struct aio_fsdev *vfsdev = ctx;
2220 :
2221 0 : TAILQ_REMOVE(&g_aio_fsdev_head, vfsdev, tailq);
2222 :
2223 0 : fsdev_free_leafs(vfsdev->root, true);
2224 0 : vfsdev->root = NULL;
2225 :
2226 0 : pthread_mutex_destroy(&vfsdev->mutex);
2227 :
2228 0 : fsdev_aio_free(vfsdev);
2229 0 : return 0;
2230 : }
2231 :
2232 : typedef int (*fsdev_op_handler_func)(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io);
2233 :
2234 : static fsdev_op_handler_func handlers[] = {
2235 : [SPDK_FSDEV_IO_MOUNT] = lo_mount,
2236 : [SPDK_FSDEV_IO_UMOUNT] = lo_umount,
2237 : [SPDK_FSDEV_IO_LOOKUP] = lo_lookup,
2238 : [SPDK_FSDEV_IO_FORGET] = lo_forget,
2239 : [SPDK_FSDEV_IO_GETATTR] = lo_getattr,
2240 : [SPDK_FSDEV_IO_SETATTR] = lo_setattr,
2241 : [SPDK_FSDEV_IO_READLINK] = lo_readlink,
2242 : [SPDK_FSDEV_IO_SYMLINK] = lo_symlink,
2243 : [SPDK_FSDEV_IO_MKNOD] = lo_mknod,
2244 : [SPDK_FSDEV_IO_MKDIR] = lo_mkdir,
2245 : [SPDK_FSDEV_IO_UNLINK] = lo_unlink,
2246 : [SPDK_FSDEV_IO_RMDIR] = lo_rmdir,
2247 : [SPDK_FSDEV_IO_RENAME] = lo_rename,
2248 : [SPDK_FSDEV_IO_LINK] = lo_link,
2249 : [SPDK_FSDEV_IO_OPEN] = lo_open,
2250 : [SPDK_FSDEV_IO_READ] = lo_read,
2251 : [SPDK_FSDEV_IO_WRITE] = lo_write,
2252 : [SPDK_FSDEV_IO_STATFS] = lo_statfs,
2253 : [SPDK_FSDEV_IO_RELEASE] = lo_release,
2254 : [SPDK_FSDEV_IO_FSYNC] = lo_fsync,
2255 : [SPDK_FSDEV_IO_SETXATTR] = lo_setxattr,
2256 : [SPDK_FSDEV_IO_GETXATTR] = lo_getxattr,
2257 : [SPDK_FSDEV_IO_LISTXATTR] = lo_listxattr,
2258 : [SPDK_FSDEV_IO_REMOVEXATTR] = lo_removexattr,
2259 : [SPDK_FSDEV_IO_FLUSH] = lo_flush,
2260 : [SPDK_FSDEV_IO_OPENDIR] = lo_opendir,
2261 : [SPDK_FSDEV_IO_READDIR] = lo_readdir,
2262 : [SPDK_FSDEV_IO_RELEASEDIR] = lo_releasedir,
2263 : [SPDK_FSDEV_IO_FSYNCDIR] = lo_fsyncdir,
2264 : [SPDK_FSDEV_IO_FLOCK] = lo_flock,
2265 : [SPDK_FSDEV_IO_CREATE] = lo_create,
2266 : [SPDK_FSDEV_IO_ABORT] = lo_abort,
2267 : [SPDK_FSDEV_IO_FALLOCATE] = lo_fallocate,
2268 : [SPDK_FSDEV_IO_COPY_FILE_RANGE] = lo_copy_file_range,
2269 : };
2270 :
2271 : static void
2272 0 : fsdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_fsdev_io *fsdev_io)
2273 : {
2274 : int status;
2275 0 : enum spdk_fsdev_io_type type = spdk_fsdev_io_get_type(fsdev_io);
2276 :
2277 0 : assert(type >= 0 && type < __SPDK_FSDEV_IO_LAST);
2278 :
2279 0 : status = handlers[type](ch, fsdev_io);
2280 0 : if (status != IO_STATUS_ASYNC) {
2281 0 : spdk_fsdev_io_complete(fsdev_io, status);
2282 : }
2283 0 : }
2284 :
2285 : static struct spdk_io_channel *
2286 0 : fsdev_aio_get_io_channel(void *ctx)
2287 : {
2288 0 : return spdk_get_io_channel(&g_aio_fsdev_head);
2289 : }
2290 :
2291 : static void
2292 0 : fsdev_aio_write_config_json(struct spdk_fsdev *fsdev, struct spdk_json_write_ctx *w)
2293 : {
2294 0 : struct aio_fsdev *vfsdev = fsdev_to_aio_fsdev(fsdev);
2295 :
2296 0 : spdk_json_write_object_begin(w);
2297 0 : spdk_json_write_named_string(w, "method", "fsdev_aio_create");
2298 0 : spdk_json_write_named_object_begin(w, "params");
2299 0 : spdk_json_write_named_string(w, "name", spdk_fsdev_get_name(&vfsdev->fsdev));
2300 0 : spdk_json_write_named_string(w, "root_path", vfsdev->root_path);
2301 0 : spdk_json_write_named_bool(w, "enable_xattr", vfsdev->xattr_enabled);
2302 0 : spdk_json_write_named_bool(w, "enable_writeback_cache",
2303 0 : !!vfsdev->mount_opts.writeback_cache_enabled);
2304 0 : spdk_json_write_named_uint32(w, "max_write", vfsdev->mount_opts.max_write);
2305 0 : spdk_json_write_named_bool(w, "skip_rw", vfsdev->skip_rw);
2306 0 : spdk_json_write_object_end(w); /* params */
2307 0 : spdk_json_write_object_end(w);
2308 0 : }
2309 :
2310 : static const struct spdk_fsdev_fn_table aio_fn_table = {
2311 : .destruct = fsdev_aio_destruct,
2312 : .submit_request = fsdev_aio_submit_request,
2313 : .get_io_channel = fsdev_aio_get_io_channel,
2314 : .write_config_json = fsdev_aio_write_config_json,
2315 : };
2316 :
2317 : static int
2318 0 : setup_root(struct aio_fsdev *vfsdev)
2319 : {
2320 : int fd, res;
2321 0 : struct stat stat;
2322 :
2323 0 : fd = open(vfsdev->root_path, O_PATH);
2324 0 : if (fd == -1) {
2325 0 : res = -errno;
2326 0 : SPDK_ERRLOG("Cannot open root %s (err=%d)\n", vfsdev->root_path, res);
2327 0 : return res;
2328 : }
2329 :
2330 0 : res = fstatat(fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
2331 0 : if (res == -1) {
2332 0 : res = -errno;
2333 0 : SPDK_ERRLOG("Cannot get root fstatat of %s (err=%d)\n", vfsdev->root_path, res);
2334 0 : close(fd);
2335 0 : return res;
2336 : }
2337 :
2338 0 : vfsdev->root = file_object_create_unsafe(NULL, fd, stat.st_ino, stat.st_dev, stat.st_mode);
2339 0 : if (!vfsdev->root) {
2340 0 : SPDK_ERRLOG("Cannot alloc root\n");
2341 0 : close(fd);
2342 0 : return -ENOMEM;
2343 : }
2344 :
2345 0 : SPDK_INFOLOG(fsdev_aio, "root (%s) fd=%d\n", vfsdev->root_path, fd);
2346 0 : return 0;
2347 : }
2348 :
2349 : static int
2350 0 : setup_proc_self_fd(struct aio_fsdev *vfsdev)
2351 : {
2352 0 : vfsdev->proc_self_fd = open("/proc/self/fd", O_PATH);
2353 0 : if (vfsdev->proc_self_fd == -1) {
2354 0 : int saverr = -errno;
2355 0 : SPDK_ERRLOG("Failed to open procfs fd dir with %d\n", saverr);
2356 0 : return saverr;
2357 : }
2358 :
2359 0 : SPDK_DEBUGLOG(fsdev_aio, "procfs fd dir opened (fd=%d)\n", vfsdev->proc_self_fd);
2360 0 : return 0;
2361 : }
2362 :
2363 : void
2364 0 : spdk_fsdev_aio_get_default_opts(struct spdk_fsdev_aio_opts *opts)
2365 : {
2366 0 : assert(opts);
2367 :
2368 0 : memset(opts, 0, sizeof(*opts));
2369 :
2370 0 : opts->xattr_enabled = DEFAULT_XATTR_ENABLED;
2371 0 : opts->writeback_cache_enabled = DEFAULT_WRITEBACK_CACHE;
2372 0 : opts->max_write = DEFAULT_MAX_WRITE;
2373 0 : opts->skip_rw = DEFAULT_SKIP_RW;
2374 0 : }
2375 :
2376 : int
2377 0 : spdk_fsdev_aio_create(struct spdk_fsdev **fsdev, const char *name, const char *root_path,
2378 : const struct spdk_fsdev_aio_opts *opts)
2379 : {
2380 : struct aio_fsdev *vfsdev;
2381 : int rc;
2382 :
2383 0 : vfsdev = calloc(1, sizeof(*vfsdev));
2384 0 : if (!vfsdev) {
2385 0 : SPDK_ERRLOG("Could not allocate aio_fsdev\n");
2386 0 : return -ENOMEM;
2387 : }
2388 :
2389 0 : vfsdev->proc_self_fd = -1;
2390 :
2391 0 : vfsdev->fsdev.name = strdup(name);
2392 0 : if (!vfsdev->fsdev.name) {
2393 0 : SPDK_ERRLOG("Could not strdup fsdev name: %s\n", name);
2394 0 : fsdev_aio_free(vfsdev);
2395 0 : return -ENOMEM;
2396 : }
2397 :
2398 0 : vfsdev->root_path = strdup(root_path);
2399 0 : if (!vfsdev->root_path) {
2400 0 : SPDK_ERRLOG("Could not strdup root path: %s\n", root_path);
2401 0 : fsdev_aio_free(vfsdev);
2402 0 : return -ENOMEM;
2403 : }
2404 :
2405 0 : rc = setup_root(vfsdev);
2406 0 : if (rc) {
2407 0 : SPDK_ERRLOG("Could not setup root: %s (err=%d)\n", root_path, rc);
2408 0 : fsdev_aio_free(vfsdev);
2409 0 : return rc;
2410 : }
2411 :
2412 0 : rc = setup_proc_self_fd(vfsdev);
2413 0 : if (rc) {
2414 0 : SPDK_ERRLOG("Could not setup proc_self_fd (err=%d)\n", rc);
2415 0 : fsdev_aio_free(vfsdev);
2416 0 : return rc;
2417 : }
2418 :
2419 0 : if (opts->xattr_enabled) {
2420 0 : SPDK_ERRLOG("Extended attributes can only be enabled in Linux\n");
2421 0 : fsdev_aio_free(vfsdev);
2422 0 : return rc;
2423 : }
2424 :
2425 0 : vfsdev->xattr_enabled = opts->xattr_enabled;
2426 0 : vfsdev->fsdev.ctxt = vfsdev;
2427 0 : vfsdev->fsdev.fn_table = &aio_fn_table;
2428 0 : vfsdev->fsdev.module = &aio_fsdev_module;
2429 :
2430 0 : pthread_mutex_init(&vfsdev->mutex, NULL);
2431 :
2432 0 : rc = spdk_fsdev_register(&vfsdev->fsdev);
2433 0 : if (rc) {
2434 0 : fsdev_aio_free(vfsdev);
2435 0 : return rc;
2436 : }
2437 :
2438 0 : vfsdev->mount_opts.writeback_cache_enabled = DEFAULT_WRITEBACK_CACHE;
2439 0 : vfsdev->mount_opts.max_write = DEFAULT_MAX_WRITE;
2440 :
2441 0 : vfsdev->skip_rw = opts->skip_rw;
2442 :
2443 0 : *fsdev = &(vfsdev->fsdev);
2444 0 : TAILQ_INSERT_TAIL(&g_aio_fsdev_head, vfsdev, tailq);
2445 0 : SPDK_DEBUGLOG(fsdev_aio, "Created aio filesystem %s (xattr_enabled=%" PRIu8 " writeback_cache=%"
2446 : PRIu8 " max_write=%" PRIu32 " skip_rw=%" PRIu8 ")\n",
2447 : vfsdev->fsdev.name, vfsdev->xattr_enabled, vfsdev->mount_opts.writeback_cache_enabled,
2448 : vfsdev->mount_opts.max_write, vfsdev->skip_rw);
2449 0 : return rc;
2450 : }
2451 : void
2452 0 : spdk_fsdev_aio_delete(const char *name,
2453 : spdk_delete_aio_fsdev_complete cb_fn, void *cb_arg)
2454 : {
2455 : int rc;
2456 :
2457 0 : rc = spdk_fsdev_unregister_by_name(name, &aio_fsdev_module, cb_fn, cb_arg);
2458 0 : if (rc != 0) {
2459 0 : cb_fn(cb_arg, rc);
2460 : }
2461 :
2462 0 : SPDK_DEBUGLOG(fsdev_aio, "Deleted aio filesystem %s\n", name);
2463 0 : }
2464 :
2465 0 : SPDK_LOG_REGISTER_COMPONENT(fsdev_aio)
|