LCOV - code coverage report
Current view: top level - module/bdev/raid - raid0.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 110 155 71.0 %
Date: 2024-07-13 20:56:32 Functions: 9 12 75.0 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2019 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "bdev_raid.h"
       8             : 
       9             : #include "spdk/env.h"
      10             : #include "spdk/thread.h"
      11             : #include "spdk/string.h"
      12             : #include "spdk/util.h"
      13             : 
      14             : #include "spdk/log.h"
      15             : 
      16             : /*
      17             :  * brief:
      18             :  * raid0_bdev_io_completion function is called by lower layers to notify raid
      19             :  * module that particular bdev_io is completed.
      20             :  * params:
      21             :  * bdev_io - pointer to bdev io submitted to lower layers, like child io
      22             :  * success - bdev_io status
      23             :  * cb_arg - function callback context (parent raid_bdev_io)
      24             :  * returns:
      25             :  * none
      26             :  */
      27             : static void
      28          17 : raid0_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
      29             : {
      30          17 :         struct raid_bdev_io *raid_io = cb_arg;
      31             : 
      32          17 :         spdk_bdev_free_io(bdev_io);
      33             : 
      34          17 :         if (success) {
      35          16 :                 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
      36             :         } else {
      37           1 :                 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
      38             :         }
      39          17 : }
      40             : 
      41             : static void raid0_submit_rw_request(struct raid_bdev_io *raid_io);
      42             : 
      43             : static void
      44           0 : _raid0_submit_rw_request(void *_raid_io)
      45             : {
      46           0 :         struct raid_bdev_io *raid_io = _raid_io;
      47             : 
      48           0 :         raid0_submit_rw_request(raid_io);
      49           0 : }
      50             : 
      51             : /*
      52             :  * brief:
      53             :  * raid0_submit_rw_request function is used to submit I/O to the correct
      54             :  * member disk for raid0 bdevs.
      55             :  * params:
      56             :  * raid_io
      57             :  * returns:
      58             :  * none
      59             :  */
      60             : static void
      61          17 : raid0_submit_rw_request(struct raid_bdev_io *raid_io)
      62             : {
      63          17 :         struct spdk_bdev_ext_io_opts    io_opts = {};
      64          17 :         struct raid_bdev_io_channel     *raid_ch = raid_io->raid_ch;
      65          17 :         struct raid_bdev                *raid_bdev = raid_io->raid_bdev;
      66             :         uint64_t                        pd_strip;
      67             :         uint32_t                        offset_in_strip;
      68             :         uint64_t                        pd_lba;
      69             :         uint64_t                        pd_blocks;
      70             :         uint8_t                         pd_idx;
      71          17 :         int                             ret = 0;
      72             :         uint64_t                        start_strip;
      73             :         uint64_t                        end_strip;
      74             :         struct raid_base_bdev_info      *base_info;
      75             :         struct spdk_io_channel          *base_ch;
      76             : 
      77          17 :         start_strip = raid_io->offset_blocks >> raid_bdev->strip_size_shift;
      78          17 :         end_strip = (raid_io->offset_blocks + raid_io->num_blocks - 1) >>
      79          17 :                     raid_bdev->strip_size_shift;
      80          17 :         if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) {
      81           0 :                 assert(false);
      82             :                 SPDK_ERRLOG("I/O spans strip boundary!\n");
      83             :                 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
      84             :                 return;
      85             :         }
      86             : 
      87          17 :         pd_strip = start_strip / raid_bdev->num_base_bdevs;
      88          17 :         pd_idx = start_strip % raid_bdev->num_base_bdevs;
      89          17 :         offset_in_strip = raid_io->offset_blocks & (raid_bdev->strip_size - 1);
      90          17 :         pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip;
      91          17 :         pd_blocks = raid_io->num_blocks;
      92          17 :         base_info = &raid_bdev->base_bdev_info[pd_idx];
      93          17 :         if (base_info->desc == NULL) {
      94           0 :                 SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
      95           0 :                 assert(0);
      96             :         }
      97             : 
      98             :         /*
      99             :          * Submit child io to bdev layer with using base bdev descriptors, base
     100             :          * bdev lba, base bdev child io length in blocks, buffer, completion
     101             :          * function and function callback context
     102             :          */
     103          17 :         assert(raid_ch != NULL);
     104          17 :         base_ch = raid_bdev_channel_get_base_channel(raid_ch, pd_idx);
     105             : 
     106          17 :         io_opts.size = sizeof(io_opts);
     107          17 :         io_opts.memory_domain = raid_io->memory_domain;
     108          17 :         io_opts.memory_domain_ctx = raid_io->memory_domain_ctx;
     109          17 :         io_opts.metadata = raid_io->md_buf;
     110             : 
     111          17 :         if (raid_io->type == SPDK_BDEV_IO_TYPE_READ) {
     112           3 :                 ret = raid_bdev_readv_blocks_ext(base_info, base_ch,
     113             :                                                  raid_io->iovs, raid_io->iovcnt,
     114             :                                                  pd_lba, pd_blocks, raid0_bdev_io_completion,
     115             :                                                  raid_io, &io_opts);
     116          14 :         } else if (raid_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
     117          14 :                 ret = raid_bdev_writev_blocks_ext(base_info, base_ch,
     118             :                                                   raid_io->iovs, raid_io->iovcnt,
     119             :                                                   pd_lba, pd_blocks, raid0_bdev_io_completion,
     120             :                                                   raid_io, &io_opts);
     121             :         } else {
     122           0 :                 SPDK_ERRLOG("Recvd not supported io type %u\n", raid_io->type);
     123           0 :                 assert(0);
     124             :         }
     125             : 
     126          17 :         if (ret == -ENOMEM) {
     127           0 :                 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
     128             :                                         base_ch, _raid0_submit_rw_request);
     129          17 :         } else if (ret != 0) {
     130           0 :                 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
     131           0 :                 assert(false);
     132             :                 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
     133             :         }
     134             : }
     135             : 
     136             : /* raid0 IO range */
     137             : struct raid_bdev_io_range {
     138             :         uint64_t        strip_size;
     139             :         uint64_t        start_strip_in_disk;
     140             :         uint64_t        end_strip_in_disk;
     141             :         uint64_t        start_offset_in_strip;
     142             :         uint64_t        end_offset_in_strip;
     143             :         uint8_t         start_disk;
     144             :         uint8_t         end_disk;
     145             :         uint8_t         n_disks_involved;
     146             : };
     147             : 
     148             : static inline void
     149         963 : _raid0_get_io_range(struct raid_bdev_io_range *io_range,
     150             :                     uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift,
     151             :                     uint64_t offset_blocks, uint64_t num_blocks)
     152             : {
     153             :         uint64_t        start_strip;
     154             :         uint64_t        end_strip;
     155             :         uint64_t        total_blocks;
     156             : 
     157         963 :         io_range->strip_size = strip_size;
     158         963 :         total_blocks = offset_blocks + num_blocks - (num_blocks > 0);
     159             : 
     160             :         /* The start and end strip index in raid0 bdev scope */
     161         963 :         start_strip = offset_blocks >> strip_size_shift;
     162         963 :         end_strip = total_blocks >> strip_size_shift;
     163         963 :         io_range->start_strip_in_disk = start_strip / num_base_bdevs;
     164         963 :         io_range->end_strip_in_disk = end_strip / num_base_bdevs;
     165             : 
     166             :         /* The first strip may have unaligned start LBA offset.
     167             :          * The end strip may have unaligned end LBA offset.
     168             :          * Strips between them certainly have aligned offset and length to boundaries.
     169             :          */
     170         963 :         io_range->start_offset_in_strip = offset_blocks % strip_size;
     171         963 :         io_range->end_offset_in_strip = total_blocks % strip_size;
     172             : 
     173             :         /* The base bdev indexes in which start and end strips are located */
     174         963 :         io_range->start_disk = start_strip % num_base_bdevs;
     175         963 :         io_range->end_disk = end_strip % num_base_bdevs;
     176             : 
     177             :         /* Calculate how many base_bdevs are involved in io operation.
     178             :          * Number of base bdevs involved is between 1 and num_base_bdevs.
     179             :          * It will be 1 if the first strip and last strip are the same one.
     180             :          */
     181         963 :         io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs);
     182         963 : }
     183             : 
     184             : static inline void
     185       17703 : _raid0_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx,
     186             :                       uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk)
     187             : {
     188             :         uint64_t n_strips_in_disk;
     189             :         uint64_t start_offset_in_disk;
     190             :         uint64_t end_offset_in_disk;
     191             :         uint64_t offset_in_disk;
     192             :         uint64_t nblocks_in_disk;
     193             :         uint64_t start_strip_in_disk;
     194             :         uint64_t end_strip_in_disk;
     195             : 
     196       17703 :         start_strip_in_disk = io_range->start_strip_in_disk;
     197       17703 :         if (disk_idx < io_range->start_disk) {
     198        7380 :                 start_strip_in_disk += 1;
     199             :         }
     200             : 
     201       17703 :         end_strip_in_disk = io_range->end_strip_in_disk;
     202       17703 :         if (disk_idx > io_range->end_disk) {
     203        3483 :                 end_strip_in_disk -= 1;
     204             :         }
     205             : 
     206       17703 :         assert(end_strip_in_disk >= start_strip_in_disk);
     207       17703 :         n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1;
     208             : 
     209       17703 :         if (disk_idx == io_range->start_disk) {
     210         963 :                 start_offset_in_disk = io_range->start_offset_in_strip;
     211             :         } else {
     212       16740 :                 start_offset_in_disk = 0;
     213             :         }
     214             : 
     215       17703 :         if (disk_idx == io_range->end_disk) {
     216         963 :                 end_offset_in_disk = io_range->end_offset_in_strip;
     217             :         } else {
     218       16740 :                 end_offset_in_disk = io_range->strip_size - 1;
     219             :         }
     220             : 
     221       17703 :         offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size;
     222       17703 :         nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size
     223       17703 :                           + end_offset_in_disk - start_offset_in_disk + 1;
     224             : 
     225       17703 :         SPDK_DEBUGLOG(bdev_raid0,
     226             :                       "raid_bdev (strip_size 0x%" PRIx64 ") splits IO to base_bdev (%u) at (0x%" PRIx64 ", 0x%" PRIx64
     227             :                       ").\n",
     228             :                       io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk);
     229             : 
     230       17703 :         *_offset_in_disk = offset_in_disk;
     231       17703 :         *_nblocks_in_disk = nblocks_in_disk;
     232       17703 : }
     233             : 
     234             : static void raid0_submit_null_payload_request(struct raid_bdev_io *raid_io);
     235             : 
     236             : static void
     237           0 : _raid0_submit_null_payload_request(void *_raid_io)
     238             : {
     239           0 :         struct raid_bdev_io *raid_io = _raid_io;
     240             : 
     241           0 :         raid0_submit_null_payload_request(raid_io);
     242           0 : }
     243             : 
     244             : static void
     245       17703 : raid0_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
     246             : {
     247       17703 :         struct raid_bdev_io *raid_io = cb_arg;
     248             : 
     249       17703 :         raid_bdev_io_complete_part(raid_io, 1, success ?
     250             :                                    SPDK_BDEV_IO_STATUS_SUCCESS :
     251             :                                    SPDK_BDEV_IO_STATUS_FAILED);
     252             : 
     253       17703 :         spdk_bdev_free_io(bdev_io);
     254       17703 : }
     255             : 
     256             : /*
     257             :  * brief:
     258             :  * raid0_submit_null_payload_request function submits the next batch of
     259             :  * io requests with range but without payload, like FLUSH and UNMAP, to member disks;
     260             :  * it will submit as many as possible unless one base io request fails with -ENOMEM,
     261             :  * in which case it will queue itself for later submission.
     262             :  * params:
     263             :  * bdev_io - pointer to parent bdev_io on raid bdev device
     264             :  * returns:
     265             :  * none
     266             :  */
     267             : static void
     268         963 : raid0_submit_null_payload_request(struct raid_bdev_io *raid_io)
     269             : {
     270             :         struct raid_bdev                *raid_bdev;
     271         963 :         struct raid_bdev_io_range       io_range;
     272             :         int                             ret;
     273             :         struct raid_base_bdev_info      *base_info;
     274             :         struct spdk_io_channel          *base_ch;
     275             : 
     276         963 :         raid_bdev = raid_io->raid_bdev;
     277             : 
     278         963 :         _raid0_get_io_range(&io_range, raid_bdev->num_base_bdevs,
     279         963 :                             raid_bdev->strip_size, raid_bdev->strip_size_shift,
     280             :                             raid_io->offset_blocks, raid_io->num_blocks);
     281             : 
     282         963 :         if (raid_io->base_bdev_io_remaining == 0) {
     283         963 :                 raid_io->base_bdev_io_remaining = io_range.n_disks_involved;
     284             :         }
     285             : 
     286       18666 :         while (raid_io->base_bdev_io_submitted < io_range.n_disks_involved) {
     287             :                 uint8_t disk_idx;
     288       17703 :                 uint64_t offset_in_disk;
     289       17703 :                 uint64_t nblocks_in_disk;
     290             : 
     291             :                 /* base_bdev is started from start_disk to end_disk.
     292             :                  * It is possible that index of start_disk is larger than end_disk's.
     293             :                  */
     294       17703 :                 disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs;
     295       17703 :                 base_info = &raid_bdev->base_bdev_info[disk_idx];
     296       17703 :                 base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, disk_idx);
     297             : 
     298       17703 :                 _raid0_split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk);
     299             : 
     300       17703 :                 switch (raid_io->type) {
     301       17703 :                 case SPDK_BDEV_IO_TYPE_UNMAP:
     302       17703 :                         ret = raid_bdev_unmap_blocks(base_info, base_ch,
     303             :                                                      offset_in_disk, nblocks_in_disk,
     304             :                                                      raid0_base_io_complete, raid_io);
     305       17703 :                         break;
     306             : 
     307           0 :                 case SPDK_BDEV_IO_TYPE_FLUSH:
     308           0 :                         ret = raid_bdev_flush_blocks(base_info, base_ch,
     309             :                                                      offset_in_disk, nblocks_in_disk,
     310             :                                                      raid0_base_io_complete, raid_io);
     311           0 :                         break;
     312             : 
     313           0 :                 default:
     314           0 :                         SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", raid_io->type);
     315           0 :                         assert(false);
     316             :                         ret = -EIO;
     317             :                 }
     318             : 
     319       17703 :                 if (ret == 0) {
     320       17703 :                         raid_io->base_bdev_io_submitted++;
     321           0 :                 } else if (ret == -ENOMEM) {
     322           0 :                         raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
     323             :                                                 base_ch, _raid0_submit_null_payload_request);
     324           0 :                         return;
     325             :                 } else {
     326           0 :                         SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
     327           0 :                         assert(false);
     328             :                         raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
     329             :                         return;
     330             :                 }
     331             :         }
     332             : }
     333             : 
     334             : static int
     335          19 : raid0_start(struct raid_bdev *raid_bdev)
     336             : {
     337          19 :         uint64_t min_blockcnt = UINT64_MAX;
     338             :         uint64_t base_bdev_data_size;
     339             :         struct raid_base_bdev_info *base_info;
     340             : 
     341         627 :         RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
     342             :                 /* Calculate minimum block count from all base bdevs */
     343         608 :                 min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
     344             :         }
     345             : 
     346          19 :         base_bdev_data_size = (min_blockcnt >> raid_bdev->strip_size_shift) << raid_bdev->strip_size_shift;
     347             : 
     348         627 :         RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
     349         608 :                 base_info->data_size = base_bdev_data_size;
     350             :         }
     351             : 
     352             :         /*
     353             :          * Take the minimum block count based approach where total block count
     354             :          * of raid bdev is the number of base bdev times the minimum block count
     355             :          * of any base bdev.
     356             :          */
     357          19 :         SPDK_DEBUGLOG(bdev_raid0, "min blockcount %" PRIu64 ",  numbasedev %u, strip size shift %u\n",
     358             :                       min_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
     359             : 
     360          19 :         raid_bdev->bdev.blockcnt = base_bdev_data_size * raid_bdev->num_base_bdevs;
     361             : 
     362          19 :         if (raid_bdev->num_base_bdevs > 1) {
     363          19 :                 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
     364          19 :                 raid_bdev->bdev.split_on_optimal_io_boundary = true;
     365             :         } else {
     366             :                 /* Do not need to split reads/writes on single bdev RAID modules. */
     367           0 :                 raid_bdev->bdev.optimal_io_boundary = 0;
     368           0 :                 raid_bdev->bdev.split_on_optimal_io_boundary = false;
     369             :         }
     370             : 
     371          19 :         return 0;
     372             : }
     373             : 
     374             : static void
     375           0 : raid0_resize(struct raid_bdev *raid_bdev)
     376             : {
     377             :         uint64_t blockcnt;
     378             :         int rc;
     379           0 :         uint64_t min_blockcnt = UINT64_MAX;
     380             :         struct raid_base_bdev_info *base_info;
     381             :         uint64_t base_bdev_data_size;
     382             : 
     383           0 :         RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
     384           0 :                 struct spdk_bdev *base_bdev = spdk_bdev_desc_get_bdev(base_info->desc);
     385             : 
     386           0 :                 min_blockcnt = spdk_min(min_blockcnt, base_bdev->blockcnt - base_info->data_offset);
     387             :         }
     388             : 
     389           0 :         base_bdev_data_size = (min_blockcnt >> raid_bdev->strip_size_shift) << raid_bdev->strip_size_shift;
     390           0 :         blockcnt = base_bdev_data_size * raid_bdev->num_base_bdevs;
     391             : 
     392           0 :         if (blockcnt == raid_bdev->bdev.blockcnt) {
     393           0 :                 return;
     394             :         }
     395             : 
     396           0 :         rc = spdk_bdev_notify_blockcnt_change(&raid_bdev->bdev, blockcnt);
     397           0 :         if (rc != 0) {
     398           0 :                 SPDK_ERRLOG("Failed to notify blockcount change\n");
     399           0 :                 return;
     400             :         }
     401             : 
     402           0 :         SPDK_NOTICELOG("raid0 '%s': min blockcount was changed from %" PRIu64 " to %" PRIu64 "\n",
     403             :                        raid_bdev->bdev.name,
     404             :                        raid_bdev->bdev.blockcnt,
     405             :                        blockcnt);
     406             : 
     407           0 :         RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
     408           0 :                 base_info->data_size = base_bdev_data_size;
     409             :         }
     410             : }
     411             : 
     412             : static struct raid_bdev_module g_raid0_module = {
     413             :         .level = RAID0,
     414             :         .base_bdevs_min = 1,
     415             :         .memory_domains_supported = true,
     416             :         .start = raid0_start,
     417             :         .submit_rw_request = raid0_submit_rw_request,
     418             :         .submit_null_payload_request = raid0_submit_null_payload_request,
     419             :         .resize = raid0_resize,
     420             : };
     421           1 : RAID_MODULE_REGISTER(&g_raid0_module)
     422             : 
     423           1 : SPDK_LOG_REGISTER_COMPONENT(bdev_raid0)

Generated by: LCOV version 1.15