LCOV - code coverage report
Current view: top level - module/bdev/uring - bdev_uring.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 0 472 0.0 %
Date: 2024-11-17 05:47:43 Functions: 0 34 0.0 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2019 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  */
       5             : 
       6             : #include "bdev_uring.h"
       7             : 
       8             : #include "spdk/stdinc.h"
       9             : #include "spdk/config.h"
      10             : #include "spdk/barrier.h"
      11             : #include "spdk/bdev.h"
      12             : #include "spdk/env.h"
      13             : #include "spdk/fd.h"
      14             : #include "spdk/likely.h"
      15             : #include "spdk/thread.h"
      16             : #include "spdk/json.h"
      17             : #include "spdk/util.h"
      18             : #include "spdk/string.h"
      19             : 
      20             : #include "spdk/log.h"
      21             : #include "spdk_internal/uring.h"
      22             : 
      23             : #ifdef SPDK_CONFIG_URING_ZNS
      24             : #include <linux/blkzoned.h>
      25             : #define SECTOR_SHIFT 9
      26             : #endif
      27             : 
      28             : struct bdev_uring_zoned_dev {
      29             :         uint64_t                num_zones;
      30             :         uint32_t                zone_shift;
      31             :         uint32_t                lba_shift;
      32             : };
      33             : 
      34             : struct bdev_uring_io_channel {
      35             :         struct bdev_uring_group_channel         *group_ch;
      36             : };
      37             : 
      38             : struct bdev_uring_group_channel {
      39             :         uint64_t                                io_inflight;
      40             :         uint64_t                                io_pending;
      41             :         struct spdk_poller                      *poller;
      42             :         struct io_uring                         uring;
      43             : };
      44             : 
      45             : struct bdev_uring_task {
      46             :         uint64_t                        len;
      47             :         struct bdev_uring_io_channel    *ch;
      48             :         TAILQ_ENTRY(bdev_uring_task)    link;
      49             : };
      50             : 
      51             : struct bdev_uring {
      52             :         struct spdk_bdev        bdev;
      53             :         struct bdev_uring_zoned_dev     zd;
      54             :         char                    *filename;
      55             :         int                     fd;
      56             :         TAILQ_ENTRY(bdev_uring)  link;
      57             : };
      58             : 
      59             : static int bdev_uring_init(void);
      60             : static void bdev_uring_fini(void);
      61             : static void uring_free_bdev(struct bdev_uring *uring);
      62             : static TAILQ_HEAD(, bdev_uring) g_uring_bdev_head = TAILQ_HEAD_INITIALIZER(g_uring_bdev_head);
      63             : 
      64             : #define SPDK_URING_QUEUE_DEPTH 512
      65             : #define MAX_EVENTS_PER_POLL 32
      66             : 
      67             : static int
      68           0 : bdev_uring_get_ctx_size(void)
      69             : {
      70           0 :         return sizeof(struct bdev_uring_task);
      71             : }
      72             : 
      73             : static struct spdk_bdev_module uring_if = {
      74             :         .name           = "uring",
      75             :         .module_init    = bdev_uring_init,
      76             :         .module_fini    = bdev_uring_fini,
      77             :         .get_ctx_size   = bdev_uring_get_ctx_size,
      78             : };
      79             : 
      80           0 : SPDK_BDEV_MODULE_REGISTER(uring, &uring_if)
      81             : 
      82             : static int
      83           0 : bdev_uring_open(struct bdev_uring *bdev)
      84             : {
      85           0 :         int fd;
      86             : 
      87           0 :         fd = open(bdev->filename, O_RDWR | O_DIRECT | O_NOATIME);
      88           0 :         if (fd < 0) {
      89             :                 /* Try without O_DIRECT for non-disk files */
      90           0 :                 fd = open(bdev->filename, O_RDWR | O_NOATIME);
      91           0 :                 if (fd < 0) {
      92           0 :                         SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n",
      93             :                                     bdev->filename, errno, spdk_strerror(errno));
      94           0 :                         bdev->fd = -1;
      95           0 :                         return -1;
      96             :                 }
      97           0 :         }
      98             : 
      99           0 :         bdev->fd = fd;
     100             : 
     101           0 :         return 0;
     102           0 : }
     103             : 
     104             : static int
     105           0 : bdev_uring_close(struct bdev_uring *bdev)
     106             : {
     107           0 :         int rc;
     108             : 
     109           0 :         if (bdev->fd == -1) {
     110           0 :                 return 0;
     111             :         }
     112             : 
     113           0 :         rc = close(bdev->fd);
     114           0 :         if (rc < 0) {
     115           0 :                 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n",
     116             :                             bdev->fd, errno, spdk_strerror(errno));
     117           0 :                 return -1;
     118             :         }
     119             : 
     120           0 :         bdev->fd = -1;
     121             : 
     122           0 :         return 0;
     123           0 : }
     124             : 
     125             : static int64_t
     126           0 : bdev_uring_readv(struct bdev_uring *uring, struct spdk_io_channel *ch,
     127             :                  struct bdev_uring_task *uring_task,
     128             :                  struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
     129             : {
     130           0 :         struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch);
     131           0 :         struct bdev_uring_group_channel *group_ch = uring_ch->group_ch;
     132           0 :         struct io_uring_sqe *sqe;
     133             : 
     134           0 :         sqe = io_uring_get_sqe(&group_ch->uring);
     135           0 :         if (!sqe) {
     136           0 :                 SPDK_DEBUGLOG(uring, "get sqe failed as out of resource\n");
     137           0 :                 return -ENOMEM;
     138             :         }
     139             : 
     140           0 :         io_uring_prep_readv(sqe, uring->fd, iov, iovcnt, offset);
     141           0 :         io_uring_sqe_set_data(sqe, uring_task);
     142           0 :         uring_task->len = nbytes;
     143           0 :         uring_task->ch = uring_ch;
     144             : 
     145           0 :         SPDK_DEBUGLOG(uring, "read %d iovs size %lu to off: %#lx\n",
     146             :                       iovcnt, nbytes, offset);
     147             : 
     148           0 :         group_ch->io_pending++;
     149           0 :         return nbytes;
     150           0 : }
     151             : 
     152             : static int64_t
     153           0 : bdev_uring_writev(struct bdev_uring *uring, struct spdk_io_channel *ch,
     154             :                   struct bdev_uring_task *uring_task,
     155             :                   struct iovec *iov, int iovcnt, size_t nbytes, uint64_t offset)
     156             : {
     157           0 :         struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch);
     158           0 :         struct bdev_uring_group_channel *group_ch = uring_ch->group_ch;
     159           0 :         struct io_uring_sqe *sqe;
     160             : 
     161           0 :         sqe = io_uring_get_sqe(&group_ch->uring);
     162           0 :         if (!sqe) {
     163           0 :                 SPDK_DEBUGLOG(uring, "get sqe failed as out of resource\n");
     164           0 :                 return -ENOMEM;
     165             :         }
     166             : 
     167           0 :         io_uring_prep_writev(sqe, uring->fd, iov, iovcnt, offset);
     168           0 :         io_uring_sqe_set_data(sqe, uring_task);
     169           0 :         uring_task->len = nbytes;
     170           0 :         uring_task->ch = uring_ch;
     171             : 
     172           0 :         SPDK_DEBUGLOG(uring, "write %d iovs size %lu from off: %#lx\n",
     173             :                       iovcnt, nbytes, offset);
     174             : 
     175           0 :         group_ch->io_pending++;
     176           0 :         return nbytes;
     177           0 : }
     178             : 
     179             : static int
     180           0 : bdev_uring_destruct(void *ctx)
     181             : {
     182           0 :         struct bdev_uring *uring = ctx;
     183           0 :         int rc = 0;
     184             : 
     185           0 :         TAILQ_REMOVE(&g_uring_bdev_head, uring, link);
     186           0 :         rc = bdev_uring_close(uring);
     187           0 :         if (rc < 0) {
     188           0 :                 SPDK_ERRLOG("bdev_uring_close() failed\n");
     189           0 :         }
     190           0 :         spdk_io_device_unregister(uring, NULL);
     191           0 :         uring_free_bdev(uring);
     192           0 :         return rc;
     193           0 : }
     194             : 
     195             : static int
     196           0 : bdev_uring_reap(struct io_uring *ring, int max)
     197             : {
     198           0 :         int i, count, ret;
     199           0 :         struct io_uring_cqe *cqe;
     200           0 :         struct bdev_uring_task *uring_task;
     201           0 :         enum spdk_bdev_io_status status;
     202             : 
     203           0 :         count = 0;
     204           0 :         for (i = 0; i < max; i++) {
     205           0 :                 ret = io_uring_peek_cqe(ring, &cqe);
     206           0 :                 if (ret != 0) {
     207           0 :                         return ret;
     208             :                 }
     209             : 
     210           0 :                 if (cqe == NULL) {
     211           0 :                         return count;
     212             :                 }
     213             : 
     214           0 :                 uring_task = (struct bdev_uring_task *)cqe->user_data;
     215           0 :                 if (cqe->res != (signed)uring_task->len) {
     216           0 :                         status = SPDK_BDEV_IO_STATUS_FAILED;
     217           0 :                 } else {
     218           0 :                         status = SPDK_BDEV_IO_STATUS_SUCCESS;
     219             :                 }
     220             : 
     221           0 :                 uring_task->ch->group_ch->io_inflight--;
     222           0 :                 io_uring_cqe_seen(ring, cqe);
     223           0 :                 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(uring_task), status);
     224           0 :                 count++;
     225           0 :         }
     226             : 
     227           0 :         return count;
     228           0 : }
     229             : 
     230             : static int
     231           0 : bdev_uring_group_poll(void *arg)
     232             : {
     233           0 :         struct bdev_uring_group_channel *group_ch = arg;
     234           0 :         int to_complete, to_submit;
     235           0 :         int count, ret;
     236             : 
     237           0 :         to_submit = group_ch->io_pending;
     238             : 
     239           0 :         if (to_submit > 0) {
     240             :                 /* If there are I/O to submit, use io_uring_submit here.
     241             :                  * It will automatically call spdk_io_uring_enter appropriately. */
     242           0 :                 ret = io_uring_submit(&group_ch->uring);
     243           0 :                 if (ret < 0) {
     244           0 :                         return SPDK_POLLER_BUSY;
     245             :                 }
     246             : 
     247           0 :                 group_ch->io_pending = 0;
     248           0 :                 group_ch->io_inflight += to_submit;
     249           0 :         }
     250             : 
     251           0 :         to_complete = group_ch->io_inflight;
     252           0 :         count = 0;
     253           0 :         if (to_complete > 0) {
     254           0 :                 count = bdev_uring_reap(&group_ch->uring, to_complete);
     255           0 :         }
     256             : 
     257           0 :         if (count + to_submit > 0) {
     258           0 :                 return SPDK_POLLER_BUSY;
     259             :         } else {
     260           0 :                 return SPDK_POLLER_IDLE;
     261             :         }
     262           0 : }
     263             : 
     264             : static void
     265           0 : bdev_uring_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
     266             :                       bool success)
     267             : {
     268           0 :         int64_t ret = 0;
     269             : 
     270           0 :         if (!success) {
     271           0 :                 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
     272           0 :                 return;
     273             :         }
     274             : 
     275           0 :         switch (bdev_io->type) {
     276             :         case SPDK_BDEV_IO_TYPE_READ:
     277           0 :                 ret = bdev_uring_readv((struct bdev_uring *)bdev_io->bdev->ctxt,
     278           0 :                                        ch,
     279           0 :                                        (struct bdev_uring_task *)bdev_io->driver_ctx,
     280           0 :                                        bdev_io->u.bdev.iovs,
     281           0 :                                        bdev_io->u.bdev.iovcnt,
     282           0 :                                        bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
     283           0 :                                        bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
     284           0 :                 break;
     285             :         case SPDK_BDEV_IO_TYPE_WRITE:
     286           0 :                 ret = bdev_uring_writev((struct bdev_uring *)bdev_io->bdev->ctxt,
     287           0 :                                         ch,
     288           0 :                                         (struct bdev_uring_task *)bdev_io->driver_ctx,
     289           0 :                                         bdev_io->u.bdev.iovs,
     290           0 :                                         bdev_io->u.bdev.iovcnt,
     291           0 :                                         bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
     292           0 :                                         bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
     293           0 :                 break;
     294             :         default:
     295           0 :                 SPDK_ERRLOG("Wrong io type\n");
     296           0 :                 break;
     297             :         }
     298             : 
     299           0 :         if (ret == -ENOMEM) {
     300           0 :                 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
     301           0 :         }
     302           0 : }
     303             : 
     304             : #ifdef SPDK_CONFIG_URING_ZNS
     305             : static int
     306           0 : bdev_uring_read_sysfs_attr(const char *devname, const char *attr, char *str, int str_len)
     307             : {
     308           0 :         char *path = NULL;
     309           0 :         char *device = NULL;
     310           0 :         char *name;
     311           0 :         FILE *file;
     312           0 :         int ret = 0;
     313             : 
     314           0 :         name = strdup(devname);
     315           0 :         if (name == NULL) {
     316           0 :                 return -EINVAL;
     317             :         }
     318           0 :         device = basename(name);
     319           0 :         path = spdk_sprintf_alloc("/sys/block/%s/%s", device, attr);
     320           0 :         free(name);
     321           0 :         if (!path) {
     322           0 :                 return -EINVAL;
     323             :         }
     324             : 
     325           0 :         file = fopen(path, "r");
     326           0 :         if (!file) {
     327           0 :                 free(path);
     328           0 :                 return -ENOENT;
     329             :         }
     330             : 
     331           0 :         if (!fgets(str, str_len, file)) {
     332           0 :                 ret = -EINVAL;
     333           0 :                 goto close;
     334             :         }
     335             : 
     336           0 :         spdk_str_chomp(str);
     337             : 
     338             : close:
     339           0 :         free(path);
     340           0 :         fclose(file);
     341           0 :         return ret;
     342           0 : }
     343             : 
     344             : static int
     345           0 : bdev_uring_read_sysfs_attr_long(const char *devname, const char *attr, long *val)
     346             : {
     347           0 :         char str[128];
     348           0 :         int ret;
     349             : 
     350           0 :         ret = bdev_uring_read_sysfs_attr(devname, attr, str, sizeof(str));
     351           0 :         if (ret) {
     352           0 :                 return ret;
     353             :         }
     354             : 
     355           0 :         *val = spdk_strtol(str, 10);
     356             : 
     357           0 :         return 0;
     358           0 : }
     359             : 
     360             : static int
     361           0 : bdev_uring_fill_zone_type(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep)
     362             : {
     363           0 :         switch (zones_rep->type) {
     364             :         case BLK_ZONE_TYPE_CONVENTIONAL:
     365           0 :                 zone_info->type = SPDK_BDEV_ZONE_TYPE_CNV;
     366           0 :                 break;
     367             :         case BLK_ZONE_TYPE_SEQWRITE_REQ:
     368           0 :                 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
     369           0 :                 break;
     370             :         case BLK_ZONE_TYPE_SEQWRITE_PREF:
     371           0 :                 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWP;
     372           0 :                 break;
     373             :         default:
     374           0 :                 SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", zones_rep->type);
     375           0 :                 return -EIO;
     376             :         }
     377           0 :         return 0;
     378           0 : }
     379             : 
     380             : static int
     381           0 : bdev_uring_fill_zone_state(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep)
     382             : {
     383           0 :         switch (zones_rep->cond) {
     384             :         case BLK_ZONE_COND_EMPTY:
     385           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
     386           0 :                 break;
     387             :         case BLK_ZONE_COND_IMP_OPEN:
     388           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
     389           0 :                 break;
     390             :         case BLK_ZONE_COND_EXP_OPEN:
     391           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
     392           0 :                 break;
     393             :         case BLK_ZONE_COND_CLOSED:
     394           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
     395           0 :                 break;
     396             :         case BLK_ZONE_COND_READONLY:
     397           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
     398           0 :                 break;
     399             :         case BLK_ZONE_COND_FULL:
     400           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_FULL;
     401           0 :                 break;
     402             :         case BLK_ZONE_COND_OFFLINE:
     403           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
     404           0 :                 break;
     405             :         case BLK_ZONE_COND_NOT_WP:
     406           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_NOT_WP;
     407           0 :                 break;
     408             :         default:
     409           0 :                 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", zones_rep->cond);
     410           0 :                 return -EIO;
     411             :         }
     412           0 :         return 0;
     413           0 : }
     414             : 
     415             : static int
     416           0 : bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io)
     417             : {
     418           0 :         struct bdev_uring *uring;
     419           0 :         struct blk_zone_range range;
     420           0 :         long unsigned zone_mgmt_op;
     421           0 :         uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
     422             : 
     423           0 :         uring = (struct bdev_uring *)bdev_io->bdev->ctxt;
     424             : 
     425           0 :         switch (bdev_io->u.zone_mgmt.zone_action) {
     426             :         case SPDK_BDEV_ZONE_RESET:
     427           0 :                 zone_mgmt_op = BLKRESETZONE;
     428           0 :                 break;
     429             :         case SPDK_BDEV_ZONE_OPEN:
     430           0 :                 zone_mgmt_op = BLKOPENZONE;
     431           0 :                 break;
     432             :         case SPDK_BDEV_ZONE_CLOSE:
     433           0 :                 zone_mgmt_op = BLKCLOSEZONE;
     434           0 :                 break;
     435             :         case SPDK_BDEV_ZONE_FINISH:
     436           0 :                 zone_mgmt_op = BLKFINISHZONE;
     437           0 :                 break;
     438             :         default:
     439           0 :                 return -EINVAL;
     440             :         }
     441             : 
     442           0 :         range.sector = (zone_id << uring->zd.lba_shift);
     443           0 :         range.nr_sectors = (uring->bdev.zone_size << uring->zd.lba_shift);
     444             : 
     445           0 :         if (ioctl(uring->fd, zone_mgmt_op, &range)) {
     446           0 :                 SPDK_ERRLOG("Ioctl BLKXXXZONE(%#x) failed errno: %d(%s)\n",
     447             :                             bdev_io->u.zone_mgmt.zone_action, errno, strerror(errno));
     448           0 :                 return -EINVAL;
     449             :         }
     450             : 
     451           0 :         spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
     452             : 
     453           0 :         return 0;
     454           0 : }
     455             : 
     456             : static int
     457           0 : bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io)
     458             : {
     459           0 :         struct bdev_uring *uring;
     460           0 :         struct blk_zone *zones;
     461           0 :         struct blk_zone_report *rep;
     462           0 :         struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf;
     463           0 :         size_t repsize;
     464           0 :         uint32_t i, shift;
     465           0 :         uint32_t num_zones = bdev_io->u.zone_mgmt.num_zones;
     466           0 :         uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
     467             : 
     468           0 :         uring = (struct bdev_uring *)bdev_io->bdev->ctxt;
     469           0 :         shift = uring->zd.lba_shift;
     470             : 
     471           0 :         if ((num_zones > uring->zd.num_zones) || !num_zones) {
     472           0 :                 return -EINVAL;
     473             :         }
     474             : 
     475           0 :         repsize = sizeof(struct blk_zone_report) + (sizeof(struct blk_zone) * num_zones);
     476           0 :         rep = (struct blk_zone_report *)malloc(repsize);
     477           0 :         if (!rep) {
     478           0 :                 return -ENOMEM;
     479             :         }
     480             : 
     481           0 :         zones = (struct blk_zone *)(rep + 1);
     482             : 
     483           0 :         while (num_zones && ((zone_id >> uring->zd.zone_shift) <= num_zones)) {
     484           0 :                 memset(rep, 0, repsize);
     485           0 :                 rep->sector = zone_id;
     486           0 :                 rep->nr_zones = num_zones;
     487             : 
     488           0 :                 if (ioctl(uring->fd, BLKREPORTZONE, rep)) {
     489           0 :                         SPDK_ERRLOG("Ioctl BLKREPORTZONE failed errno: %d(%s)\n",
     490             :                                     errno, strerror(errno));
     491           0 :                         free(rep);
     492           0 :                         return -EINVAL;
     493             :                 }
     494             : 
     495           0 :                 if (!rep->nr_zones) {
     496           0 :                         break;
     497             :                 }
     498             : 
     499           0 :                 for (i = 0; i < rep->nr_zones; i++) {
     500           0 :                         zone_info->zone_id = ((zones + i)->start >> shift);
     501           0 :                         zone_info->write_pointer = ((zones + i)->wp >> shift);
     502           0 :                         zone_info->capacity = ((zones + i)->capacity >> shift);
     503             : 
     504           0 :                         bdev_uring_fill_zone_state(zone_info, zones + i);
     505           0 :                         bdev_uring_fill_zone_type(zone_info, zones + i);
     506             : 
     507           0 :                         zone_id = ((zones + i)->start + (zones + i)->len) >> shift;
     508           0 :                         zone_info++;
     509           0 :                         num_zones--;
     510           0 :                 }
     511             :         }
     512             : 
     513           0 :         spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
     514           0 :         free(rep);
     515           0 :         return 0;
     516           0 : }
     517             : 
     518             : static int
     519           0 : bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename)
     520             : {
     521           0 :         char str[128];
     522           0 :         long int val = 0;
     523           0 :         uint32_t zinfo;
     524           0 :         int retval = -1;
     525             : 
     526           0 :         uring->bdev.zoned = false;
     527             : 
     528             :         /* Check if this is a zoned block device */
     529           0 :         if (bdev_uring_read_sysfs_attr(filename, "queue/zoned", str, sizeof(str))) {
     530           0 :                 SPDK_ERRLOG("Unable to open file %s/queue/zoned. errno: %d\n", filename, errno);
     531           0 :         } else if (strcmp(str, "host-aware") == 0 || strcmp(str, "host-managed") == 0) {
     532             :                 /* Only host-aware & host-managed zns devices */
     533           0 :                 uring->bdev.zoned = true;
     534             : 
     535           0 :                 if (ioctl(uring->fd, BLKGETNRZONES, &zinfo)) {
     536           0 :                         SPDK_ERRLOG("ioctl BLKNRZONES failed %d (%s)\n", errno, strerror(errno));
     537           0 :                         goto err_ret;
     538             :                 }
     539           0 :                 uring->zd.num_zones = zinfo;
     540             : 
     541           0 :                 if (ioctl(uring->fd, BLKGETZONESZ, &zinfo)) {
     542           0 :                         SPDK_ERRLOG("ioctl BLKGETZONESZ failed %d (%s)\n", errno, strerror(errno));
     543           0 :                         goto err_ret;
     544             :                 }
     545             : 
     546           0 :                 uring->zd.lba_shift = uring->bdev.required_alignment - SECTOR_SHIFT;
     547           0 :                 uring->bdev.zone_size = (zinfo >> uring->zd.lba_shift);
     548           0 :                 uring->zd.zone_shift = spdk_u32log2(zinfo >> uring->zd.lba_shift);
     549             : 
     550           0 :                 if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_open_zones", &val)) {
     551           0 :                         SPDK_ERRLOG("Failed to get max open zones %d (%s)\n", errno, strerror(errno));
     552           0 :                         goto err_ret;
     553             :                 }
     554           0 :                 uring->bdev.max_open_zones = uring->bdev.optimal_open_zones = (uint32_t)val;
     555             : 
     556           0 :                 if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_active_zones", &val)) {
     557           0 :                         SPDK_ERRLOG("Failed to get max active zones %d (%s)\n", errno, strerror(errno));
     558           0 :                         goto err_ret;
     559             :                 }
     560           0 :                 uring->bdev.max_active_zones = (uint32_t)val;
     561           0 :                 retval = 0;
     562           0 :         } else {
     563           0 :                 retval = 0;        /* queue/zoned=none */
     564             :         }
     565             : 
     566             : err_ret:
     567           0 :         return retval;
     568           0 : }
     569             : #else
     570             : /* No support for zoned devices */
     571             : static int
     572             : bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io)
     573             : {
     574             :         return -1;
     575             : }
     576             : 
     577             : static int
     578             : bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io)
     579             : {
     580             :         return -1;
     581             : }
     582             : 
     583             : static int
     584             : bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename)
     585             : {
     586             :         return 0;
     587             : }
     588             : #endif
     589             : 
     590             : static int
     591           0 : _bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
     592             : {
     593             : 
     594           0 :         switch (bdev_io->type) {
     595             :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
     596           0 :                 return bdev_uring_zone_get_info(bdev_io);
     597             :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
     598           0 :                 return bdev_uring_zone_management_op(bdev_io);
     599             :         /* Read and write operations must be performed on buffers aligned to
     600             :          * bdev->required_alignment. If user specified unaligned buffers,
     601             :          * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
     602             :         case SPDK_BDEV_IO_TYPE_READ:
     603             :         case SPDK_BDEV_IO_TYPE_WRITE:
     604           0 :                 spdk_bdev_io_get_buf(bdev_io, bdev_uring_get_buf_cb,
     605           0 :                                      bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
     606           0 :                 return 0;
     607             :         default:
     608           0 :                 return -1;
     609             :         }
     610           0 : }
     611             : 
     612             : static void
     613           0 : bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
     614             : {
     615           0 :         if (_bdev_uring_submit_request(ch, bdev_io) < 0) {
     616           0 :                 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
     617           0 :         }
     618           0 : }
     619             : 
     620             : static bool
     621           0 : bdev_uring_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
     622             : {
     623           0 :         switch (io_type) {
     624             : #ifdef SPDK_CONFIG_URING_ZNS
     625             :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
     626             :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
     627             : #endif
     628             :         case SPDK_BDEV_IO_TYPE_READ:
     629             :         case SPDK_BDEV_IO_TYPE_WRITE:
     630           0 :                 return true;
     631             :         default:
     632           0 :                 return false;
     633             :         }
     634           0 : }
     635             : 
     636             : static int
     637           0 : bdev_uring_create_cb(void *io_device, void *ctx_buf)
     638             : {
     639           0 :         struct bdev_uring_io_channel *ch = ctx_buf;
     640             : 
     641           0 :         ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&uring_if));
     642             : 
     643           0 :         return 0;
     644           0 : }
     645             : 
     646             : static void
     647           0 : bdev_uring_destroy_cb(void *io_device, void *ctx_buf)
     648             : {
     649           0 :         struct bdev_uring_io_channel *ch = ctx_buf;
     650             : 
     651           0 :         spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch));
     652           0 : }
     653             : 
     654             : static struct spdk_io_channel *
     655           0 : bdev_uring_get_io_channel(void *ctx)
     656             : {
     657           0 :         struct bdev_uring *uring = ctx;
     658             : 
     659           0 :         return spdk_get_io_channel(uring);
     660           0 : }
     661             : 
     662             : static int
     663           0 : bdev_uring_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
     664             : {
     665           0 :         struct bdev_uring *uring = ctx;
     666             : 
     667           0 :         spdk_json_write_named_object_begin(w, "uring");
     668             : 
     669           0 :         spdk_json_write_named_string(w, "filename", uring->filename);
     670             : 
     671           0 :         spdk_json_write_object_end(w);
     672             : 
     673           0 :         return 0;
     674           0 : }
     675             : 
     676             : static void
     677           0 : bdev_uring_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
     678             : {
     679           0 :         struct bdev_uring *uring = bdev->ctxt;
     680             : 
     681           0 :         spdk_json_write_object_begin(w);
     682             : 
     683           0 :         spdk_json_write_named_string(w, "method", "bdev_uring_create");
     684             : 
     685           0 :         spdk_json_write_named_object_begin(w, "params");
     686           0 :         spdk_json_write_named_string(w, "name", bdev->name);
     687           0 :         spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
     688           0 :         spdk_json_write_named_string(w, "filename", uring->filename);
     689           0 :         spdk_json_write_object_end(w);
     690             : 
     691           0 :         spdk_json_write_object_end(w);
     692           0 : }
     693             : 
     694             : static const struct spdk_bdev_fn_table uring_fn_table = {
     695             :         .destruct               = bdev_uring_destruct,
     696             :         .submit_request         = bdev_uring_submit_request,
     697             :         .io_type_supported      = bdev_uring_io_type_supported,
     698             :         .get_io_channel         = bdev_uring_get_io_channel,
     699             :         .dump_info_json         = bdev_uring_dump_info_json,
     700             :         .write_config_json      = bdev_uring_write_json_config,
     701             : };
     702             : 
     703             : static void
     704           0 : uring_free_bdev(struct bdev_uring *uring)
     705             : {
     706           0 :         if (uring == NULL) {
     707           0 :                 return;
     708             :         }
     709           0 :         free(uring->filename);
     710           0 :         free(uring->bdev.name);
     711           0 :         free(uring);
     712           0 : }
     713             : 
     714             : static int
     715           0 : bdev_uring_group_create_cb(void *io_device, void *ctx_buf)
     716             : {
     717           0 :         struct bdev_uring_group_channel *ch = ctx_buf;
     718             : 
     719             :         /* Do not use IORING_SETUP_IOPOLL until the Linux kernel can support not only
     720             :          * local devices but also devices attached from remote target */
     721           0 :         if (io_uring_queue_init(SPDK_URING_QUEUE_DEPTH, &ch->uring, 0) < 0) {
     722           0 :                 SPDK_ERRLOG("uring I/O context setup failure\n");
     723           0 :                 return -1;
     724             :         }
     725             : 
     726           0 :         ch->poller = SPDK_POLLER_REGISTER(bdev_uring_group_poll, ch, 0);
     727           0 :         return 0;
     728           0 : }
     729             : 
     730             : static void
     731           0 : bdev_uring_group_destroy_cb(void *io_device, void *ctx_buf)
     732             : {
     733           0 :         struct bdev_uring_group_channel *ch = ctx_buf;
     734             : 
     735           0 :         io_uring_queue_exit(&ch->uring);
     736             : 
     737           0 :         spdk_poller_unregister(&ch->poller);
     738           0 : }
     739             : 
     740             : struct spdk_bdev *
     741           0 : create_uring_bdev(const char *name, const char *filename, uint32_t block_size)
     742             : {
     743           0 :         struct bdev_uring *uring;
     744           0 :         uint32_t detected_block_size;
     745           0 :         uint64_t bdev_size;
     746           0 :         int rc;
     747             : 
     748           0 :         uring = calloc(1, sizeof(*uring));
     749           0 :         if (!uring) {
     750           0 :                 SPDK_ERRLOG("Unable to allocate enough memory for uring backend\n");
     751           0 :                 return NULL;
     752             :         }
     753             : 
     754           0 :         uring->filename = strdup(filename);
     755           0 :         if (!uring->filename) {
     756           0 :                 goto error_return;
     757             :         }
     758             : 
     759           0 :         if (bdev_uring_open(uring)) {
     760           0 :                 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, uring->fd, errno);
     761           0 :                 goto error_return;
     762             :         }
     763             : 
     764           0 :         bdev_size = spdk_fd_get_size(uring->fd);
     765             : 
     766           0 :         uring->bdev.name = strdup(name);
     767           0 :         if (!uring->bdev.name) {
     768           0 :                 goto error_return;
     769             :         }
     770           0 :         uring->bdev.product_name = "URING bdev";
     771           0 :         uring->bdev.module = &uring_if;
     772             : 
     773           0 :         uring->bdev.write_cache = 0;
     774             : 
     775           0 :         detected_block_size = spdk_fd_get_blocklen(uring->fd);
     776           0 :         if (block_size == 0) {
     777             :                 /* User did not specify block size - use autodetected block size. */
     778           0 :                 if (detected_block_size == 0) {
     779           0 :                         SPDK_ERRLOG("Block size could not be auto-detected\n");
     780           0 :                         goto error_return;
     781             :                 }
     782           0 :                 block_size = detected_block_size;
     783           0 :         } else {
     784           0 :                 if (block_size < detected_block_size) {
     785           0 :                         SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than "
     786             :                                     "auto-detected block size %" PRIu32 "\n",
     787             :                                     block_size, detected_block_size);
     788           0 :                         goto error_return;
     789           0 :                 } else if (detected_block_size != 0 && block_size != detected_block_size) {
     790           0 :                         SPDK_WARNLOG("Specified block size %" PRIu32 " does not match "
     791             :                                      "auto-detected block size %" PRIu32 "\n",
     792             :                                      block_size, detected_block_size);
     793           0 :                 }
     794             :         }
     795             : 
     796           0 :         if (block_size < 512) {
     797           0 :                 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
     798           0 :                 goto error_return;
     799             :         }
     800             : 
     801           0 :         if (!spdk_u32_is_pow2(block_size)) {
     802           0 :                 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
     803           0 :                 goto error_return;
     804             :         }
     805             : 
     806           0 :         uring->bdev.blocklen = block_size;
     807           0 :         uring->bdev.required_alignment = spdk_u32log2(block_size);
     808             : 
     809           0 :         rc = bdev_uring_check_zoned_support(uring, name, filename);
     810           0 :         if (rc) {
     811           0 :                 goto error_return;
     812             :         }
     813             : 
     814           0 :         if (bdev_size % uring->bdev.blocklen != 0) {
     815           0 :                 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
     816             :                             bdev_size, uring->bdev.blocklen);
     817           0 :                 goto error_return;
     818             :         }
     819             : 
     820           0 :         uring->bdev.blockcnt = bdev_size / uring->bdev.blocklen;
     821           0 :         uring->bdev.ctxt = uring;
     822             : 
     823           0 :         uring->bdev.fn_table = &uring_fn_table;
     824             : 
     825           0 :         spdk_io_device_register(uring, bdev_uring_create_cb, bdev_uring_destroy_cb,
     826             :                                 sizeof(struct bdev_uring_io_channel),
     827           0 :                                 uring->bdev.name);
     828           0 :         rc = spdk_bdev_register(&uring->bdev);
     829           0 :         if (rc) {
     830           0 :                 spdk_io_device_unregister(uring, NULL);
     831           0 :                 goto error_return;
     832             :         }
     833             : 
     834           0 :         TAILQ_INSERT_TAIL(&g_uring_bdev_head, uring, link);
     835           0 :         return &uring->bdev;
     836             : 
     837             : error_return:
     838           0 :         bdev_uring_close(uring);
     839           0 :         uring_free_bdev(uring);
     840           0 :         return NULL;
     841           0 : }
     842             : 
     843             : struct delete_uring_bdev_ctx {
     844             :         spdk_delete_uring_complete cb_fn;
     845             :         void *cb_arg;
     846             : };
     847             : 
     848             : static void
     849           0 : uring_bdev_unregister_cb(void *arg, int bdeverrno)
     850             : {
     851           0 :         struct delete_uring_bdev_ctx *ctx = arg;
     852             : 
     853           0 :         ctx->cb_fn(ctx->cb_arg, bdeverrno);
     854           0 :         free(ctx);
     855           0 : }
     856             : 
     857             : void
     858           0 : delete_uring_bdev(const char *name, spdk_delete_uring_complete cb_fn, void *cb_arg)
     859             : {
     860           0 :         struct delete_uring_bdev_ctx *ctx;
     861           0 :         int rc;
     862             : 
     863           0 :         ctx = calloc(1, sizeof(*ctx));
     864           0 :         if (ctx == NULL) {
     865           0 :                 cb_fn(cb_arg, -ENOMEM);
     866           0 :                 return;
     867             :         }
     868             : 
     869           0 :         ctx->cb_fn = cb_fn;
     870           0 :         ctx->cb_arg = cb_arg;
     871           0 :         rc = spdk_bdev_unregister_by_name(name, &uring_if, uring_bdev_unregister_cb, ctx);
     872           0 :         if (rc != 0) {
     873           0 :                 uring_bdev_unregister_cb(ctx, rc);
     874           0 :         }
     875           0 : }
     876             : 
     877             : static int
     878           0 : bdev_uring_init(void)
     879             : {
     880           0 :         spdk_io_device_register(&uring_if, bdev_uring_group_create_cb, bdev_uring_group_destroy_cb,
     881             :                                 sizeof(struct bdev_uring_group_channel), "uring_module");
     882             : 
     883           0 :         return 0;
     884             : }
     885             : 
     886             : static void
     887           0 : bdev_uring_fini(void)
     888             : {
     889           0 :         spdk_io_device_unregister(&uring_if, NULL);
     890           0 : }
     891             : 
     892           0 : SPDK_LOG_REGISTER_COMPONENT(uring)

Generated by: LCOV version 1.15