LCOV - code coverage report
Current view: top level - lib/reduce - reduce.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 793 989 80.2 %
Date: 2024-11-19 04:11:26 Functions: 50 52 96.2 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2018 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #include "spdk/reduce.h"
      10             : #include "spdk/env.h"
      11             : #include "spdk/string.h"
      12             : #include "spdk/bit_array.h"
      13             : #include "spdk/util.h"
      14             : #include "spdk/log.h"
      15             : #include "spdk/memory.h"
      16             : 
      17             : #include "libpmem.h"
      18             : 
      19             : /* Always round up the size of the PM region to the nearest cacheline. */
      20             : #define REDUCE_PM_SIZE_ALIGNMENT        64
      21             : 
      22             : /* Offset into the backing device where the persistent memory file's path is stored. */
      23             : #define REDUCE_BACKING_DEV_PATH_OFFSET  4096
      24             : 
      25             : #define REDUCE_EMPTY_MAP_ENTRY  -1ULL
      26             : 
      27             : #define REDUCE_NUM_VOL_REQUESTS 256
      28             : 
      29             : /* Structure written to offset 0 of both the pm file and the backing device. */
      30             : struct spdk_reduce_vol_superblock {
      31             :         uint8_t                         signature[8];
      32             :         struct spdk_reduce_vol_params   params;
      33             :         uint8_t                         reserved[4048];
      34             : };
      35             : SPDK_STATIC_ASSERT(sizeof(struct spdk_reduce_vol_superblock) == 4096, "size incorrect");
      36             : 
      37             : #define SPDK_REDUCE_SIGNATURE "SPDKREDU"
      38             : /* null terminator counts one */
      39             : SPDK_STATIC_ASSERT(sizeof(SPDK_REDUCE_SIGNATURE) - 1 ==
      40             :                    SPDK_SIZEOF_MEMBER(struct spdk_reduce_vol_superblock, signature), "size incorrect");
      41             : 
      42             : #define REDUCE_PATH_MAX 4096
      43             : 
      44             : #define REDUCE_ZERO_BUF_SIZE 0x100000
      45             : 
      46             : /**
      47             :  * Describes a persistent memory file used to hold metadata associated with a
      48             :  *  compressed volume.
      49             :  */
      50             : struct spdk_reduce_pm_file {
      51             :         char                    path[REDUCE_PATH_MAX];
      52             :         void                    *pm_buf;
      53             :         int                     pm_is_pmem;
      54             :         uint64_t                size;
      55             : };
      56             : 
      57             : #define REDUCE_IO_READV         1
      58             : #define REDUCE_IO_WRITEV        2
      59             : 
      60             : struct spdk_reduce_chunk_map {
      61             :         uint32_t                compressed_size;
      62             :         uint32_t                reserved;
      63             :         uint64_t                io_unit_index[0];
      64             : };
      65             : 
      66             : struct spdk_reduce_vol_request {
      67             :         /**
      68             :          *  Scratch buffer used for uncompressed chunk.  This is used for:
      69             :          *   1) source buffer for compression operations
      70             :          *   2) destination buffer for decompression operations
      71             :          *   3) data buffer when writing uncompressed chunk to disk
      72             :          *   4) data buffer when reading uncompressed chunk from disk
      73             :          */
      74             :         uint8_t                                 *decomp_buf;
      75             :         struct iovec                            *decomp_buf_iov;
      76             : 
      77             :         /**
      78             :          * These are used to construct the iovecs that are sent to
      79             :          *  the decomp engine, they point to a mix of the scratch buffer
      80             :          *  and user buffer
      81             :          */
      82             :         struct iovec                            decomp_iov[REDUCE_MAX_IOVECS + 2];
      83             :         int                                     decomp_iovcnt;
      84             : 
      85             :         /**
      86             :          *  Scratch buffer used for compressed chunk.  This is used for:
      87             :          *   1) destination buffer for compression operations
      88             :          *   2) source buffer for decompression operations
      89             :          *   3) data buffer when writing compressed chunk to disk
      90             :          *   4) data buffer when reading compressed chunk from disk
      91             :          */
      92             :         uint8_t                                 *comp_buf;
      93             :         struct iovec                            *comp_buf_iov;
      94             :         struct iovec                            *iov;
      95             :         bool                                    rmw;
      96             :         struct spdk_reduce_vol                  *vol;
      97             :         int                                     type;
      98             :         int                                     reduce_errno;
      99             :         int                                     iovcnt;
     100             :         int                                     num_backing_ops;
     101             :         uint32_t                                num_io_units;
     102             :         bool                                    chunk_is_compressed;
     103             :         bool                                    copy_after_decompress;
     104             :         uint64_t                                offset;
     105             :         uint64_t                                logical_map_index;
     106             :         uint64_t                                length;
     107             :         uint64_t                                chunk_map_index;
     108             :         struct spdk_reduce_chunk_map            *chunk;
     109             :         spdk_reduce_vol_op_complete             cb_fn;
     110             :         void                                    *cb_arg;
     111             :         TAILQ_ENTRY(spdk_reduce_vol_request)    tailq;
     112             :         struct spdk_reduce_vol_cb_args          backing_cb_args;
     113             : };
     114             : 
     115             : struct spdk_reduce_vol {
     116             :         struct spdk_reduce_vol_params           params;
     117             :         uint32_t                                backing_io_units_per_chunk;
     118             :         uint32_t                                backing_lba_per_io_unit;
     119             :         uint32_t                                logical_blocks_per_chunk;
     120             :         struct spdk_reduce_pm_file              pm_file;
     121             :         struct spdk_reduce_backing_dev          *backing_dev;
     122             :         struct spdk_reduce_vol_superblock       *backing_super;
     123             :         struct spdk_reduce_vol_superblock       *pm_super;
     124             :         uint64_t                                *pm_logical_map;
     125             :         uint64_t                                *pm_chunk_maps;
     126             : 
     127             :         struct spdk_bit_array                   *allocated_chunk_maps;
     128             :         struct spdk_bit_array                   *allocated_backing_io_units;
     129             : 
     130             :         struct spdk_reduce_vol_request          *request_mem;
     131             :         TAILQ_HEAD(, spdk_reduce_vol_request)   free_requests;
     132             :         TAILQ_HEAD(, spdk_reduce_vol_request)   executing_requests;
     133             :         TAILQ_HEAD(, spdk_reduce_vol_request)   queued_requests;
     134             : 
     135             :         /* Single contiguous buffer used for all request buffers for this volume. */
     136             :         uint8_t                                 *buf_mem;
     137             :         struct iovec                            *buf_iov_mem;
     138             : };
     139             : 
     140             : static void _start_readv_request(struct spdk_reduce_vol_request *req);
     141             : static void _start_writev_request(struct spdk_reduce_vol_request *req);
     142             : static uint8_t *g_zero_buf;
     143             : static int g_vol_count = 0;
     144             : 
     145             : /*
     146             :  * Allocate extra metadata chunks and corresponding backing io units to account for
     147             :  *  outstanding IO in worst case scenario where logical map is completely allocated
     148             :  *  and no data can be compressed.  We need extra chunks in this case to handle
     149             :  *  in-flight writes since reduce never writes data in place.
     150             :  */
     151             : #define REDUCE_NUM_EXTRA_CHUNKS 128
     152             : 
     153             : static void
     154          40 : _reduce_persist(struct spdk_reduce_vol *vol, const void *addr, size_t len)
     155             : {
     156          40 :         if (vol->pm_file.pm_is_pmem) {
     157          40 :                 pmem_persist(addr, len);
     158          40 :         } else {
     159           0 :                 pmem_msync(addr, len);
     160             :         }
     161          40 : }
     162             : 
     163             : static uint64_t
     164          47 : _get_pm_logical_map_size(uint64_t vol_size, uint64_t chunk_size)
     165             : {
     166          47 :         uint64_t chunks_in_logical_map, logical_map_size;
     167             : 
     168          47 :         chunks_in_logical_map = vol_size / chunk_size;
     169          47 :         logical_map_size = chunks_in_logical_map * sizeof(uint64_t);
     170             : 
     171             :         /* Round up to next cacheline. */
     172          94 :         return spdk_divide_round_up(logical_map_size, REDUCE_PM_SIZE_ALIGNMENT) *
     173             :                REDUCE_PM_SIZE_ALIGNMENT;
     174          47 : }
     175             : 
     176             : static uint64_t
     177         336 : _get_total_chunks(uint64_t vol_size, uint64_t chunk_size)
     178             : {
     179         336 :         uint64_t num_chunks;
     180             : 
     181         336 :         num_chunks = vol_size / chunk_size;
     182         336 :         num_chunks += REDUCE_NUM_EXTRA_CHUNKS;
     183             : 
     184         672 :         return num_chunks;
     185         336 : }
     186             : 
     187             : static inline uint32_t
     188         326 : _reduce_vol_get_chunk_struct_size(uint64_t backing_io_units_per_chunk)
     189             : {
     190         326 :         return sizeof(struct spdk_reduce_chunk_map) + sizeof(uint64_t) * backing_io_units_per_chunk;
     191             : }
     192             : 
     193             : static uint64_t
     194          24 : _get_pm_total_chunks_size(uint64_t vol_size, uint64_t chunk_size, uint64_t backing_io_unit_size)
     195             : {
     196          24 :         uint64_t io_units_per_chunk, num_chunks, total_chunks_size;
     197             : 
     198          24 :         num_chunks = _get_total_chunks(vol_size, chunk_size);
     199          24 :         io_units_per_chunk = chunk_size / backing_io_unit_size;
     200             : 
     201          24 :         total_chunks_size = num_chunks * _reduce_vol_get_chunk_struct_size(io_units_per_chunk);
     202             : 
     203          48 :         return spdk_divide_round_up(total_chunks_size, REDUCE_PM_SIZE_ALIGNMENT) *
     204             :                REDUCE_PM_SIZE_ALIGNMENT;
     205          24 : }
     206             : 
     207             : static struct spdk_reduce_chunk_map *
     208         289 : _reduce_vol_get_chunk_map(struct spdk_reduce_vol *vol, uint64_t chunk_map_index)
     209             : {
     210         289 :         uintptr_t chunk_map_addr;
     211             : 
     212         289 :         assert(chunk_map_index < _get_total_chunks(vol->params.vol_size, vol->params.chunk_size));
     213             : 
     214         289 :         chunk_map_addr = (uintptr_t)vol->pm_chunk_maps;
     215         578 :         chunk_map_addr += chunk_map_index *
     216         289 :                           _reduce_vol_get_chunk_struct_size(vol->backing_io_units_per_chunk);
     217             : 
     218         578 :         return (struct spdk_reduce_chunk_map *)chunk_map_addr;
     219         289 : }
     220             : 
     221             : static int
     222          20 : _validate_vol_params(struct spdk_reduce_vol_params *params)
     223             : {
     224          20 :         if (params->vol_size > 0) {
     225             :                 /**
     226             :                  * User does not pass in the vol size - it gets calculated by libreduce from
     227             :                  *  values in this structure plus the size of the backing device.
     228             :                  */
     229           0 :                 return -EINVAL;
     230             :         }
     231             : 
     232          40 :         if (params->chunk_size == 0 || params->backing_io_unit_size == 0 ||
     233          20 :             params->logical_block_size == 0) {
     234           0 :                 return -EINVAL;
     235             :         }
     236             : 
     237             :         /* Chunk size must be an even multiple of the backing io unit size. */
     238          20 :         if ((params->chunk_size % params->backing_io_unit_size) != 0) {
     239           0 :                 return -EINVAL;
     240             :         }
     241             : 
     242             :         /* Chunk size must be an even multiple of the logical block size. */
     243          20 :         if ((params->chunk_size % params->logical_block_size) != 0) {
     244           0 :                 return -1;
     245             :         }
     246             : 
     247          20 :         return 0;
     248          20 : }
     249             : 
     250             : static uint64_t
     251          26 : _get_vol_size(uint64_t chunk_size, uint64_t backing_dev_size)
     252             : {
     253          26 :         uint64_t num_chunks;
     254             : 
     255          26 :         num_chunks = backing_dev_size / chunk_size;
     256          26 :         if (num_chunks <= REDUCE_NUM_EXTRA_CHUNKS) {
     257           1 :                 return 0;
     258             :         }
     259             : 
     260          25 :         num_chunks -= REDUCE_NUM_EXTRA_CHUNKS;
     261          25 :         return num_chunks * chunk_size;
     262          26 : }
     263             : 
     264             : static uint64_t
     265          24 : _get_pm_file_size(struct spdk_reduce_vol_params *params)
     266             : {
     267          24 :         uint64_t total_pm_size;
     268             : 
     269          24 :         total_pm_size = sizeof(struct spdk_reduce_vol_superblock);
     270          24 :         total_pm_size += _get_pm_logical_map_size(params->vol_size, params->chunk_size);
     271          48 :         total_pm_size += _get_pm_total_chunks_size(params->vol_size, params->chunk_size,
     272          24 :                          params->backing_io_unit_size);
     273          48 :         return total_pm_size;
     274          24 : }
     275             : 
     276             : const struct spdk_uuid *
     277           1 : spdk_reduce_vol_get_uuid(struct spdk_reduce_vol *vol)
     278             : {
     279           1 :         return &vol->params.uuid;
     280             : }
     281             : 
     282             : static void
     283          23 : _initialize_vol_pm_pointers(struct spdk_reduce_vol *vol)
     284             : {
     285          23 :         uint64_t logical_map_size;
     286             : 
     287             :         /* Superblock is at the beginning of the pm file. */
     288          23 :         vol->pm_super = (struct spdk_reduce_vol_superblock *)vol->pm_file.pm_buf;
     289             : 
     290             :         /* Logical map immediately follows the super block. */
     291          23 :         vol->pm_logical_map = (uint64_t *)(vol->pm_super + 1);
     292             : 
     293             :         /* Chunks maps follow the logical map. */
     294          23 :         logical_map_size = _get_pm_logical_map_size(vol->params.vol_size, vol->params.chunk_size);
     295          23 :         vol->pm_chunk_maps = (uint64_t *)((uint8_t *)vol->pm_logical_map + logical_map_size);
     296          23 : }
     297             : 
     298             : /* We need 2 iovs during load - one for the superblock, another for the path */
     299             : #define LOAD_IOV_COUNT  2
     300             : 
     301             : struct reduce_init_load_ctx {
     302             :         struct spdk_reduce_vol                  *vol;
     303             :         struct spdk_reduce_vol_cb_args          backing_cb_args;
     304             :         spdk_reduce_vol_op_with_handle_complete cb_fn;
     305             :         void                                    *cb_arg;
     306             :         struct iovec                            iov[LOAD_IOV_COUNT];
     307             :         void                                    *path;
     308             : };
     309             : 
     310             : static inline bool
     311       13830 : _addr_crosses_huge_page(const void *addr, size_t *size)
     312             : {
     313       13830 :         size_t _size;
     314       13830 :         uint64_t rc;
     315             : 
     316       13830 :         assert(size);
     317             : 
     318       13830 :         _size = *size;
     319       13830 :         rc = spdk_vtophys(addr, size);
     320             : 
     321       13830 :         return rc == SPDK_VTOPHYS_ERROR || _size != *size;
     322       13830 : }
     323             : 
     324             : static inline int
     325       13824 : _set_buffer(uint8_t **vol_buffer, uint8_t **_addr, uint8_t *addr_range, size_t buffer_size)
     326             : {
     327       13824 :         uint8_t *addr;
     328       13824 :         size_t size_tmp = buffer_size;
     329             : 
     330       13824 :         addr = *_addr;
     331             : 
     332             :         /* Verify that addr + buffer_size doesn't cross huge page boundary */
     333       13824 :         if (_addr_crosses_huge_page(addr, &size_tmp)) {
     334             :                 /* Memory start is aligned on 2MiB, so buffer should be located at the end of the page.
     335             :                  * Skip remaining bytes and continue from the beginning of the next page */
     336           6 :                 addr += size_tmp;
     337           6 :         }
     338             : 
     339       13824 :         if (addr + buffer_size > addr_range) {
     340           0 :                 SPDK_ERRLOG("Vol buffer %p out of range %p\n", addr, addr_range);
     341           0 :                 return -ERANGE;
     342             :         }
     343             : 
     344       13824 :         *vol_buffer = addr;
     345       13824 :         *_addr = addr + buffer_size;
     346             : 
     347       13824 :         return 0;
     348       13824 : }
     349             : 
     350             : static int
     351          27 : _allocate_vol_requests(struct spdk_reduce_vol *vol)
     352             : {
     353          27 :         struct spdk_reduce_vol_request *req;
     354          27 :         uint32_t reqs_in_2mb_page, huge_pages_needed;
     355          27 :         uint8_t *buffer, *buffer_end;
     356          27 :         int i = 0;
     357          27 :         int rc = 0;
     358             : 
     359             :         /* It is needed to allocate comp and decomp buffers so that they do not cross physical
     360             :         * page boundaries. Assume that the system uses default 2MiB pages and chunk_size is not
     361             :         * necessarily power of 2
     362             :         * Allocate 2x since we need buffers for both read/write and compress/decompress
     363             :         * intermediate buffers. */
     364          27 :         reqs_in_2mb_page = VALUE_2MB / (vol->params.chunk_size * 2);
     365          27 :         if (!reqs_in_2mb_page) {
     366           0 :                 return -EINVAL;
     367             :         }
     368          27 :         huge_pages_needed = SPDK_CEIL_DIV(REDUCE_NUM_VOL_REQUESTS, reqs_in_2mb_page);
     369             : 
     370          27 :         vol->buf_mem = spdk_dma_malloc(VALUE_2MB * huge_pages_needed, VALUE_2MB, NULL);
     371          27 :         if (vol->buf_mem == NULL) {
     372           0 :                 return -ENOMEM;
     373             :         }
     374             : 
     375          27 :         vol->request_mem = calloc(REDUCE_NUM_VOL_REQUESTS, sizeof(*req));
     376          27 :         if (vol->request_mem == NULL) {
     377           0 :                 spdk_free(vol->buf_mem);
     378           0 :                 vol->buf_mem = NULL;
     379           0 :                 return -ENOMEM;
     380             :         }
     381             : 
     382             :         /* Allocate 2x since we need iovs for both read/write and compress/decompress intermediate
     383             :          *  buffers.
     384             :          */
     385          27 :         vol->buf_iov_mem = calloc(REDUCE_NUM_VOL_REQUESTS,
     386          27 :                                   2 * sizeof(struct iovec) * vol->backing_io_units_per_chunk);
     387          27 :         if (vol->buf_iov_mem == NULL) {
     388           0 :                 free(vol->request_mem);
     389           0 :                 spdk_free(vol->buf_mem);
     390           0 :                 vol->request_mem = NULL;
     391           0 :                 vol->buf_mem = NULL;
     392           0 :                 return -ENOMEM;
     393             :         }
     394             : 
     395          27 :         buffer = vol->buf_mem;
     396          27 :         buffer_end = buffer + VALUE_2MB * huge_pages_needed;
     397             : 
     398        6939 :         for (i = 0; i < REDUCE_NUM_VOL_REQUESTS; i++) {
     399        6912 :                 req = &vol->request_mem[i];
     400        6912 :                 TAILQ_INSERT_HEAD(&vol->free_requests, req, tailq);
     401        6912 :                 req->decomp_buf_iov = &vol->buf_iov_mem[(2 * i) * vol->backing_io_units_per_chunk];
     402        6912 :                 req->comp_buf_iov = &vol->buf_iov_mem[(2 * i + 1) * vol->backing_io_units_per_chunk];
     403             : 
     404        6912 :                 rc = _set_buffer(&req->comp_buf, &buffer, buffer_end, vol->params.chunk_size);
     405        6912 :                 if (rc) {
     406           0 :                         SPDK_ERRLOG("Failed to set comp buffer for req idx %u, addr %p, start %p, end %p\n", i, buffer,
     407             :                                     vol->buf_mem, buffer_end);
     408           0 :                         break;
     409             :                 }
     410        6912 :                 rc = _set_buffer(&req->decomp_buf, &buffer, buffer_end, vol->params.chunk_size);
     411        6912 :                 if (rc) {
     412           0 :                         SPDK_ERRLOG("Failed to set decomp buffer for req idx %u, addr %p, start %p, end %p\n", i, buffer,
     413             :                                     vol->buf_mem, buffer_end);
     414           0 :                         break;
     415             :                 }
     416        6912 :         }
     417             : 
     418          27 :         if (rc) {
     419           0 :                 free(vol->buf_iov_mem);
     420           0 :                 free(vol->request_mem);
     421           0 :                 spdk_free(vol->buf_mem);
     422           0 :                 vol->buf_mem = NULL;
     423           0 :                 vol->buf_iov_mem = NULL;
     424           0 :                 vol->request_mem = NULL;
     425           0 :         }
     426             : 
     427          27 :         return rc;
     428          27 : }
     429             : 
     430             : static void
     431          53 : _init_load_cleanup(struct spdk_reduce_vol *vol, struct reduce_init_load_ctx *ctx)
     432             : {
     433          53 :         if (ctx != NULL) {
     434          25 :                 spdk_free(ctx->path);
     435          25 :                 free(ctx);
     436          25 :         }
     437             : 
     438          53 :         if (vol != NULL) {
     439          29 :                 if (vol->pm_file.pm_buf != NULL) {
     440          23 :                         pmem_unmap(vol->pm_file.pm_buf, vol->pm_file.size);
     441          23 :                 }
     442             : 
     443          29 :                 spdk_free(vol->backing_super);
     444          29 :                 spdk_bit_array_free(&vol->allocated_chunk_maps);
     445          29 :                 spdk_bit_array_free(&vol->allocated_backing_io_units);
     446          29 :                 free(vol->request_mem);
     447          29 :                 free(vol->buf_iov_mem);
     448          29 :                 spdk_free(vol->buf_mem);
     449          29 :                 free(vol);
     450          29 :         }
     451          53 : }
     452             : 
     453             : static int
     454          25 : _alloc_zero_buff(void)
     455             : {
     456          25 :         int rc = 0;
     457             : 
     458             :         /* The zero buffer is shared between all volumes and just used
     459             :          * for reads so allocate one global instance here if not already
     460             :          * allocated when another vol init'd or loaded.
     461             :          */
     462          25 :         if (g_vol_count++ == 0) {
     463          23 :                 g_zero_buf = spdk_zmalloc(REDUCE_ZERO_BUF_SIZE,
     464             :                                           64, NULL, SPDK_ENV_LCORE_ID_ANY,
     465             :                                           SPDK_MALLOC_DMA);
     466          23 :                 if (g_zero_buf == NULL) {
     467           0 :                         rc = -ENOMEM;
     468           0 :                 }
     469          23 :         }
     470          50 :         return rc;
     471          25 : }
     472             : 
     473             : static void
     474          14 : _init_write_super_cpl(void *cb_arg, int reduce_errno)
     475             : {
     476          14 :         struct reduce_init_load_ctx *init_ctx = cb_arg;
     477          14 :         int rc;
     478             : 
     479          14 :         rc = _allocate_vol_requests(init_ctx->vol);
     480          14 :         if (rc != 0) {
     481           0 :                 init_ctx->cb_fn(init_ctx->cb_arg, NULL, rc);
     482           0 :                 _init_load_cleanup(init_ctx->vol, init_ctx);
     483           0 :                 return;
     484             :         }
     485             : 
     486          14 :         rc = _alloc_zero_buff();
     487          14 :         if (rc != 0) {
     488           0 :                 init_ctx->cb_fn(init_ctx->cb_arg, NULL, rc);
     489           0 :                 _init_load_cleanup(init_ctx->vol, init_ctx);
     490           0 :                 return;
     491             :         }
     492             : 
     493          14 :         init_ctx->cb_fn(init_ctx->cb_arg, init_ctx->vol, reduce_errno);
     494             :         /* Only clean up the ctx - the vol has been passed to the application
     495             :          *  for use now that initialization was successful.
     496             :          */
     497          14 :         _init_load_cleanup(NULL, init_ctx);
     498          14 : }
     499             : 
     500             : static void
     501          14 : _init_write_path_cpl(void *cb_arg, int reduce_errno)
     502             : {
     503          14 :         struct reduce_init_load_ctx *init_ctx = cb_arg;
     504          14 :         struct spdk_reduce_vol *vol = init_ctx->vol;
     505             : 
     506          14 :         init_ctx->iov[0].iov_base = vol->backing_super;
     507          14 :         init_ctx->iov[0].iov_len = sizeof(*vol->backing_super);
     508          14 :         init_ctx->backing_cb_args.cb_fn = _init_write_super_cpl;
     509          14 :         init_ctx->backing_cb_args.cb_arg = init_ctx;
     510          28 :         vol->backing_dev->writev(vol->backing_dev, init_ctx->iov, 1,
     511          14 :                                  0, sizeof(*vol->backing_super) / vol->backing_dev->blocklen,
     512          14 :                                  &init_ctx->backing_cb_args);
     513          14 : }
     514             : 
     515             : static int
     516          23 : _allocate_bit_arrays(struct spdk_reduce_vol *vol)
     517             : {
     518          23 :         uint64_t total_chunks, total_backing_io_units;
     519          23 :         uint32_t i, num_metadata_io_units;
     520             : 
     521          23 :         total_chunks = _get_total_chunks(vol->params.vol_size, vol->params.chunk_size);
     522          23 :         vol->allocated_chunk_maps = spdk_bit_array_create(total_chunks);
     523          23 :         total_backing_io_units = total_chunks * (vol->params.chunk_size / vol->params.backing_io_unit_size);
     524          23 :         vol->allocated_backing_io_units = spdk_bit_array_create(total_backing_io_units);
     525             : 
     526          23 :         if (vol->allocated_chunk_maps == NULL || vol->allocated_backing_io_units == NULL) {
     527           0 :                 return -ENOMEM;
     528             :         }
     529             : 
     530             :         /* Set backing io unit bits associated with metadata. */
     531          23 :         num_metadata_io_units = (sizeof(*vol->backing_super) + REDUCE_PATH_MAX) /
     532          23 :                                 vol->backing_dev->blocklen;
     533         265 :         for (i = 0; i < num_metadata_io_units; i++) {
     534         242 :                 spdk_bit_array_set(vol->allocated_backing_io_units, i);
     535         242 :         }
     536             : 
     537          23 :         return 0;
     538          23 : }
     539             : 
     540             : void
     541          16 : spdk_reduce_vol_init(struct spdk_reduce_vol_params *params,
     542             :                      struct spdk_reduce_backing_dev *backing_dev,
     543             :                      const char *pm_file_dir,
     544             :                      spdk_reduce_vol_op_with_handle_complete cb_fn, void *cb_arg)
     545             : {
     546          16 :         struct spdk_reduce_vol *vol;
     547          16 :         struct reduce_init_load_ctx *init_ctx;
     548          16 :         uint64_t backing_dev_size;
     549          16 :         size_t mapped_len;
     550          16 :         int dir_len, max_dir_len, rc;
     551             : 
     552             :         /* We need to append a path separator and the UUID to the supplied
     553             :          * path.
     554             :          */
     555          16 :         max_dir_len = REDUCE_PATH_MAX - SPDK_UUID_STRING_LEN - 1;
     556          16 :         dir_len = strnlen(pm_file_dir, max_dir_len);
     557             :         /* Strip trailing slash if the user provided one - we will add it back
     558             :          * later when appending the filename.
     559             :          */
     560          16 :         if (pm_file_dir[dir_len - 1] == '/') {
     561           0 :                 dir_len--;
     562           0 :         }
     563          16 :         if (dir_len == max_dir_len) {
     564           0 :                 SPDK_ERRLOG("pm_file_dir (%s) too long\n", pm_file_dir);
     565           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
     566           0 :                 return;
     567             :         }
     568             : 
     569          16 :         rc = _validate_vol_params(params);
     570          16 :         if (rc != 0) {
     571           0 :                 SPDK_ERRLOG("invalid vol params\n");
     572           0 :                 cb_fn(cb_arg, NULL, rc);
     573           0 :                 return;
     574             :         }
     575             : 
     576          16 :         backing_dev_size = backing_dev->blockcnt * backing_dev->blocklen;
     577          16 :         params->vol_size = _get_vol_size(params->chunk_size, backing_dev_size);
     578          16 :         if (params->vol_size == 0) {
     579           1 :                 SPDK_ERRLOG("backing device is too small\n");
     580           1 :                 cb_fn(cb_arg, NULL, -EINVAL);
     581           1 :                 return;
     582             :         }
     583             : 
     584          29 :         if (backing_dev->readv == NULL || backing_dev->writev == NULL ||
     585          14 :             backing_dev->unmap == NULL) {
     586           1 :                 SPDK_ERRLOG("backing_dev function pointer not specified\n");
     587           1 :                 cb_fn(cb_arg, NULL, -EINVAL);
     588           1 :                 return;
     589             :         }
     590             : 
     591          14 :         vol = calloc(1, sizeof(*vol));
     592          14 :         if (vol == NULL) {
     593           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
     594           0 :                 return;
     595             :         }
     596             : 
     597          14 :         TAILQ_INIT(&vol->free_requests);
     598          14 :         TAILQ_INIT(&vol->executing_requests);
     599          14 :         TAILQ_INIT(&vol->queued_requests);
     600             : 
     601          14 :         vol->backing_super = spdk_zmalloc(sizeof(*vol->backing_super), 0, NULL,
     602             :                                           SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
     603          14 :         if (vol->backing_super == NULL) {
     604           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
     605           0 :                 _init_load_cleanup(vol, NULL);
     606           0 :                 return;
     607             :         }
     608             : 
     609          14 :         init_ctx = calloc(1, sizeof(*init_ctx));
     610          14 :         if (init_ctx == NULL) {
     611           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
     612           0 :                 _init_load_cleanup(vol, NULL);
     613           0 :                 return;
     614             :         }
     615             : 
     616          14 :         init_ctx->path = spdk_zmalloc(REDUCE_PATH_MAX, 0, NULL,
     617             :                                       SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
     618          14 :         if (init_ctx->path == NULL) {
     619           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
     620           0 :                 _init_load_cleanup(vol, init_ctx);
     621           0 :                 return;
     622             :         }
     623             : 
     624          14 :         if (spdk_uuid_is_null(&params->uuid)) {
     625           1 :                 spdk_uuid_generate(&params->uuid);
     626           1 :         }
     627             : 
     628          14 :         memcpy(vol->pm_file.path, pm_file_dir, dir_len);
     629          14 :         vol->pm_file.path[dir_len] = '/';
     630          28 :         spdk_uuid_fmt_lower(&vol->pm_file.path[dir_len + 1], SPDK_UUID_STRING_LEN,
     631          14 :                             &params->uuid);
     632          14 :         vol->pm_file.size = _get_pm_file_size(params);
     633          28 :         vol->pm_file.pm_buf = pmem_map_file(vol->pm_file.path, vol->pm_file.size,
     634          14 :                                             PMEM_FILE_CREATE | PMEM_FILE_EXCL, 0600,
     635          14 :                                             &mapped_len, &vol->pm_file.pm_is_pmem);
     636          14 :         if (vol->pm_file.pm_buf == NULL) {
     637           0 :                 SPDK_ERRLOG("could not pmem_map_file(%s): %s\n",
     638             :                             vol->pm_file.path, strerror(errno));
     639           0 :                 cb_fn(cb_arg, NULL, -errno);
     640           0 :                 _init_load_cleanup(vol, init_ctx);
     641           0 :                 return;
     642             :         }
     643             : 
     644          14 :         if (vol->pm_file.size != mapped_len) {
     645           0 :                 SPDK_ERRLOG("could not map entire pmem file (size=%" PRIu64 " mapped=%" PRIu64 ")\n",
     646             :                             vol->pm_file.size, mapped_len);
     647           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
     648           0 :                 _init_load_cleanup(vol, init_ctx);
     649           0 :                 return;
     650             :         }
     651             : 
     652          14 :         vol->backing_io_units_per_chunk = params->chunk_size / params->backing_io_unit_size;
     653          14 :         vol->logical_blocks_per_chunk = params->chunk_size / params->logical_block_size;
     654          14 :         vol->backing_lba_per_io_unit = params->backing_io_unit_size / backing_dev->blocklen;
     655          14 :         memcpy(&vol->params, params, sizeof(*params));
     656             : 
     657          14 :         vol->backing_dev = backing_dev;
     658             : 
     659          14 :         rc = _allocate_bit_arrays(vol);
     660          14 :         if (rc != 0) {
     661           0 :                 cb_fn(cb_arg, NULL, rc);
     662           0 :                 _init_load_cleanup(vol, init_ctx);
     663           0 :                 return;
     664             :         }
     665             : 
     666          14 :         memcpy(vol->backing_super->signature, SPDK_REDUCE_SIGNATURE,
     667             :                sizeof(vol->backing_super->signature));
     668          14 :         memcpy(&vol->backing_super->params, params, sizeof(*params));
     669             : 
     670          14 :         _initialize_vol_pm_pointers(vol);
     671             : 
     672          14 :         memcpy(vol->pm_super, vol->backing_super, sizeof(*vol->backing_super));
     673             :         /* Writing 0xFF's is equivalent of filling it all with SPDK_EMPTY_MAP_ENTRY.
     674             :          * Note that this writes 0xFF to not just the logical map but the chunk maps as well.
     675             :          */
     676          14 :         memset(vol->pm_logical_map, 0xFF, vol->pm_file.size - sizeof(*vol->backing_super));
     677          14 :         _reduce_persist(vol, vol->pm_file.pm_buf, vol->pm_file.size);
     678             : 
     679          14 :         init_ctx->vol = vol;
     680          14 :         init_ctx->cb_fn = cb_fn;
     681          14 :         init_ctx->cb_arg = cb_arg;
     682             : 
     683          14 :         memcpy(init_ctx->path, vol->pm_file.path, REDUCE_PATH_MAX);
     684          14 :         init_ctx->iov[0].iov_base = init_ctx->path;
     685          14 :         init_ctx->iov[0].iov_len = REDUCE_PATH_MAX;
     686          14 :         init_ctx->backing_cb_args.cb_fn = _init_write_path_cpl;
     687          14 :         init_ctx->backing_cb_args.cb_arg = init_ctx;
     688             :         /* Write path to offset 4K on backing device - just after where the super
     689             :          *  block will be written.  We wait until this is committed before writing the
     690             :          *  super block to guarantee we don't get the super block written without the
     691             :          *  the path if the system crashed in the middle of a write operation.
     692             :          */
     693          28 :         vol->backing_dev->writev(vol->backing_dev, init_ctx->iov, 1,
     694          14 :                                  REDUCE_BACKING_DEV_PATH_OFFSET / vol->backing_dev->blocklen,
     695          14 :                                  REDUCE_PATH_MAX / vol->backing_dev->blocklen,
     696          14 :                                  &init_ctx->backing_cb_args);
     697          16 : }
     698             : 
     699             : static void destroy_load_cb(void *cb_arg, struct spdk_reduce_vol *vol, int reduce_errno);
     700             : 
     701             : static void
     702          11 : _load_read_super_and_path_cpl(void *cb_arg, int reduce_errno)
     703             : {
     704          11 :         struct reduce_init_load_ctx *load_ctx = cb_arg;
     705          11 :         struct spdk_reduce_vol *vol = load_ctx->vol;
     706          11 :         uint64_t backing_dev_size;
     707          11 :         uint64_t i, num_chunks, logical_map_index;
     708          11 :         struct spdk_reduce_chunk_map *chunk;
     709          11 :         size_t mapped_len;
     710          11 :         uint32_t j;
     711          11 :         int rc;
     712             : 
     713          11 :         rc = _alloc_zero_buff();
     714          11 :         if (rc) {
     715           0 :                 goto error;
     716             :         }
     717             : 
     718          11 :         if (memcmp(vol->backing_super->signature,
     719             :                    SPDK_REDUCE_SIGNATURE,
     720          11 :                    sizeof(vol->backing_super->signature)) != 0) {
     721             :                 /* This backing device isn't a libreduce backing device. */
     722           1 :                 rc = -EILSEQ;
     723           1 :                 goto error;
     724             :         }
     725             : 
     726             :         /* If the cb_fn is destroy_load_cb, it means we are wanting to destroy this compress bdev.
     727             :          *  So don't bother getting the volume ready to use - invoke the callback immediately
     728             :          *  so destroy_load_cb can delete the metadata off of the block device and delete the
     729             :          *  persistent memory file if it exists.
     730             :          */
     731          10 :         memcpy(vol->pm_file.path, load_ctx->path, sizeof(vol->pm_file.path));
     732          10 :         if (load_ctx->cb_fn == (*destroy_load_cb)) {
     733           1 :                 load_ctx->cb_fn(load_ctx->cb_arg, vol, 0);
     734           1 :                 _init_load_cleanup(NULL, load_ctx);
     735           1 :                 return;
     736             :         }
     737             : 
     738           9 :         memcpy(&vol->params, &vol->backing_super->params, sizeof(vol->params));
     739           9 :         vol->backing_io_units_per_chunk = vol->params.chunk_size / vol->params.backing_io_unit_size;
     740           9 :         vol->logical_blocks_per_chunk = vol->params.chunk_size / vol->params.logical_block_size;
     741           9 :         vol->backing_lba_per_io_unit = vol->params.backing_io_unit_size / vol->backing_dev->blocklen;
     742             : 
     743           9 :         rc = _allocate_bit_arrays(vol);
     744           9 :         if (rc != 0) {
     745           0 :                 goto error;
     746             :         }
     747             : 
     748           9 :         backing_dev_size = vol->backing_dev->blockcnt * vol->backing_dev->blocklen;
     749           9 :         if (_get_vol_size(vol->params.chunk_size, backing_dev_size) < vol->params.vol_size) {
     750           0 :                 SPDK_ERRLOG("backing device size %" PRIi64 " smaller than expected\n",
     751             :                             backing_dev_size);
     752           0 :                 rc = -EILSEQ;
     753           0 :                 goto error;
     754             :         }
     755             : 
     756           9 :         vol->pm_file.size = _get_pm_file_size(&vol->params);
     757          18 :         vol->pm_file.pm_buf = pmem_map_file(vol->pm_file.path, 0, 0, 0, &mapped_len,
     758           9 :                                             &vol->pm_file.pm_is_pmem);
     759           9 :         if (vol->pm_file.pm_buf == NULL) {
     760           0 :                 SPDK_ERRLOG("could not pmem_map_file(%s): %s\n", vol->pm_file.path, strerror(errno));
     761           0 :                 rc = -errno;
     762           0 :                 goto error;
     763             :         }
     764             : 
     765           9 :         if (vol->pm_file.size != mapped_len) {
     766           0 :                 SPDK_ERRLOG("could not map entire pmem file (size=%" PRIu64 " mapped=%" PRIu64 ")\n",
     767             :                             vol->pm_file.size, mapped_len);
     768           0 :                 rc = -ENOMEM;
     769           0 :                 goto error;
     770             :         }
     771             : 
     772           9 :         rc = _allocate_vol_requests(vol);
     773           9 :         if (rc != 0) {
     774           0 :                 goto error;
     775             :         }
     776             : 
     777           9 :         _initialize_vol_pm_pointers(vol);
     778             : 
     779           9 :         num_chunks = vol->params.vol_size / vol->params.chunk_size;
     780        1161 :         for (i = 0; i < num_chunks; i++) {
     781        1152 :                 logical_map_index = vol->pm_logical_map[i];
     782        1152 :                 if (logical_map_index == REDUCE_EMPTY_MAP_ENTRY) {
     783        1146 :                         continue;
     784             :                 }
     785           6 :                 spdk_bit_array_set(vol->allocated_chunk_maps, logical_map_index);
     786           6 :                 chunk = _reduce_vol_get_chunk_map(vol, logical_map_index);
     787          30 :                 for (j = 0; j < vol->backing_io_units_per_chunk; j++) {
     788          24 :                         if (chunk->io_unit_index[j] != REDUCE_EMPTY_MAP_ENTRY) {
     789          12 :                                 spdk_bit_array_set(vol->allocated_backing_io_units, chunk->io_unit_index[j]);
     790          12 :                         }
     791          24 :                 }
     792           6 :         }
     793             : 
     794           9 :         load_ctx->cb_fn(load_ctx->cb_arg, vol, 0);
     795             :         /* Only clean up the ctx - the vol has been passed to the application
     796             :          *  for use now that volume load was successful.
     797             :          */
     798           9 :         _init_load_cleanup(NULL, load_ctx);
     799           9 :         return;
     800             : 
     801             : error:
     802           1 :         load_ctx->cb_fn(load_ctx->cb_arg, NULL, rc);
     803           1 :         _init_load_cleanup(vol, load_ctx);
     804          11 : }
     805             : 
     806             : void
     807          11 : spdk_reduce_vol_load(struct spdk_reduce_backing_dev *backing_dev,
     808             :                      spdk_reduce_vol_op_with_handle_complete cb_fn, void *cb_arg)
     809             : {
     810          11 :         struct spdk_reduce_vol *vol;
     811          11 :         struct reduce_init_load_ctx *load_ctx;
     812             : 
     813          22 :         if (backing_dev->readv == NULL || backing_dev->writev == NULL ||
     814          11 :             backing_dev->unmap == NULL) {
     815           0 :                 SPDK_ERRLOG("backing_dev function pointer not specified\n");
     816           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
     817           0 :                 return;
     818             :         }
     819             : 
     820          11 :         vol = calloc(1, sizeof(*vol));
     821          11 :         if (vol == NULL) {
     822           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
     823           0 :                 return;
     824             :         }
     825             : 
     826          11 :         TAILQ_INIT(&vol->free_requests);
     827          11 :         TAILQ_INIT(&vol->executing_requests);
     828          11 :         TAILQ_INIT(&vol->queued_requests);
     829             : 
     830          11 :         vol->backing_super = spdk_zmalloc(sizeof(*vol->backing_super), 64, NULL,
     831             :                                           SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
     832          11 :         if (vol->backing_super == NULL) {
     833           0 :                 _init_load_cleanup(vol, NULL);
     834           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
     835           0 :                 return;
     836             :         }
     837             : 
     838          11 :         vol->backing_dev = backing_dev;
     839             : 
     840          11 :         load_ctx = calloc(1, sizeof(*load_ctx));
     841          11 :         if (load_ctx == NULL) {
     842           0 :                 _init_load_cleanup(vol, NULL);
     843           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
     844           0 :                 return;
     845             :         }
     846             : 
     847          11 :         load_ctx->path = spdk_zmalloc(REDUCE_PATH_MAX, 64, NULL,
     848             :                                       SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
     849          11 :         if (load_ctx->path == NULL) {
     850           0 :                 _init_load_cleanup(vol, load_ctx);
     851           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
     852           0 :                 return;
     853             :         }
     854             : 
     855          11 :         load_ctx->vol = vol;
     856          11 :         load_ctx->cb_fn = cb_fn;
     857          11 :         load_ctx->cb_arg = cb_arg;
     858             : 
     859          11 :         load_ctx->iov[0].iov_base = vol->backing_super;
     860          11 :         load_ctx->iov[0].iov_len = sizeof(*vol->backing_super);
     861          11 :         load_ctx->iov[1].iov_base = load_ctx->path;
     862          11 :         load_ctx->iov[1].iov_len = REDUCE_PATH_MAX;
     863          11 :         load_ctx->backing_cb_args.cb_fn = _load_read_super_and_path_cpl;
     864          11 :         load_ctx->backing_cb_args.cb_arg = load_ctx;
     865          22 :         vol->backing_dev->readv(vol->backing_dev, load_ctx->iov, LOAD_IOV_COUNT, 0,
     866          11 :                                 (sizeof(*vol->backing_super) + REDUCE_PATH_MAX) /
     867          11 :                                 vol->backing_dev->blocklen,
     868          11 :                                 &load_ctx->backing_cb_args);
     869          11 : }
     870             : 
     871             : void
     872          24 : spdk_reduce_vol_unload(struct spdk_reduce_vol *vol,
     873             :                        spdk_reduce_vol_op_complete cb_fn, void *cb_arg)
     874             : {
     875          24 :         if (vol == NULL) {
     876             :                 /* This indicates a programming error. */
     877           0 :                 assert(false);
     878             :                 cb_fn(cb_arg, -EINVAL);
     879             :                 return;
     880             :         }
     881             : 
     882          24 :         if (--g_vol_count == 0) {
     883          22 :                 spdk_free(g_zero_buf);
     884          22 :         }
     885          24 :         assert(g_vol_count >= 0);
     886          24 :         _init_load_cleanup(vol, NULL);
     887          24 :         cb_fn(cb_arg, 0);
     888          24 : }
     889             : 
     890             : struct reduce_destroy_ctx {
     891             :         spdk_reduce_vol_op_complete             cb_fn;
     892             :         void                                    *cb_arg;
     893             :         struct spdk_reduce_vol                  *vol;
     894             :         struct spdk_reduce_vol_superblock       *super;
     895             :         struct iovec                            iov;
     896             :         struct spdk_reduce_vol_cb_args          backing_cb_args;
     897             :         int                                     reduce_errno;
     898             :         char                                    pm_path[REDUCE_PATH_MAX];
     899             : };
     900             : 
     901             : static void
     902           1 : destroy_unload_cpl(void *cb_arg, int reduce_errno)
     903             : {
     904           1 :         struct reduce_destroy_ctx *destroy_ctx = cb_arg;
     905             : 
     906           1 :         if (destroy_ctx->reduce_errno == 0) {
     907           1 :                 if (unlink(destroy_ctx->pm_path)) {
     908           0 :                         SPDK_ERRLOG("%s could not be unlinked: %s\n",
     909             :                                     destroy_ctx->pm_path, strerror(errno));
     910           0 :                 }
     911           1 :         }
     912             : 
     913             :         /* Even if the unload somehow failed, we still pass the destroy_ctx
     914             :          * reduce_errno since that indicates whether or not the volume was
     915             :          * actually destroyed.
     916             :          */
     917           1 :         destroy_ctx->cb_fn(destroy_ctx->cb_arg, destroy_ctx->reduce_errno);
     918           1 :         spdk_free(destroy_ctx->super);
     919           1 :         free(destroy_ctx);
     920           1 : }
     921             : 
     922             : static void
     923           1 : _destroy_zero_super_cpl(void *cb_arg, int reduce_errno)
     924             : {
     925           1 :         struct reduce_destroy_ctx *destroy_ctx = cb_arg;
     926           1 :         struct spdk_reduce_vol *vol = destroy_ctx->vol;
     927             : 
     928           1 :         destroy_ctx->reduce_errno = reduce_errno;
     929           1 :         spdk_reduce_vol_unload(vol, destroy_unload_cpl, destroy_ctx);
     930           1 : }
     931             : 
     932             : static void
     933           1 : destroy_load_cb(void *cb_arg, struct spdk_reduce_vol *vol, int reduce_errno)
     934             : {
     935           1 :         struct reduce_destroy_ctx *destroy_ctx = cb_arg;
     936             : 
     937           1 :         if (reduce_errno != 0) {
     938           0 :                 destroy_ctx->cb_fn(destroy_ctx->cb_arg, reduce_errno);
     939           0 :                 spdk_free(destroy_ctx->super);
     940           0 :                 free(destroy_ctx);
     941           0 :                 return;
     942             :         }
     943             : 
     944           1 :         destroy_ctx->vol = vol;
     945           1 :         memcpy(destroy_ctx->pm_path, vol->pm_file.path, sizeof(destroy_ctx->pm_path));
     946           1 :         destroy_ctx->iov.iov_base = destroy_ctx->super;
     947           1 :         destroy_ctx->iov.iov_len = sizeof(*destroy_ctx->super);
     948           1 :         destroy_ctx->backing_cb_args.cb_fn = _destroy_zero_super_cpl;
     949           1 :         destroy_ctx->backing_cb_args.cb_arg = destroy_ctx;
     950           2 :         vol->backing_dev->writev(vol->backing_dev, &destroy_ctx->iov, 1, 0,
     951           1 :                                  sizeof(*destroy_ctx->super) / vol->backing_dev->blocklen,
     952           1 :                                  &destroy_ctx->backing_cb_args);
     953           1 : }
     954             : 
     955             : void
     956           1 : spdk_reduce_vol_destroy(struct spdk_reduce_backing_dev *backing_dev,
     957             :                         spdk_reduce_vol_op_complete cb_fn, void *cb_arg)
     958             : {
     959           1 :         struct reduce_destroy_ctx *destroy_ctx;
     960             : 
     961           1 :         destroy_ctx = calloc(1, sizeof(*destroy_ctx));
     962           1 :         if (destroy_ctx == NULL) {
     963           0 :                 cb_fn(cb_arg, -ENOMEM);
     964           0 :                 return;
     965             :         }
     966             : 
     967           1 :         destroy_ctx->super = spdk_zmalloc(sizeof(*destroy_ctx->super), 64, NULL,
     968             :                                           SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
     969           1 :         if (destroy_ctx->super == NULL) {
     970           0 :                 free(destroy_ctx);
     971           0 :                 cb_fn(cb_arg, -ENOMEM);
     972           0 :                 return;
     973             :         }
     974           1 :         destroy_ctx->cb_fn = cb_fn;
     975           1 :         destroy_ctx->cb_arg = cb_arg;
     976           1 :         spdk_reduce_vol_load(backing_dev, destroy_load_cb, destroy_ctx);
     977           1 : }
     978             : 
     979             : static bool
     980         273 : _request_spans_chunk_boundary(struct spdk_reduce_vol *vol, uint64_t offset, uint64_t length)
     981             : {
     982         273 :         uint64_t start_chunk, end_chunk;
     983             : 
     984         273 :         start_chunk = offset / vol->logical_blocks_per_chunk;
     985         273 :         end_chunk = (offset + length - 1) / vol->logical_blocks_per_chunk;
     986             : 
     987         546 :         return (start_chunk != end_chunk);
     988         273 : }
     989             : 
     990             : typedef void (*reduce_request_fn)(void *_req, int reduce_errno);
     991             : 
     992             : static void
     993         279 : _reduce_vol_complete_req(struct spdk_reduce_vol_request *req, int reduce_errno)
     994             : {
     995         279 :         struct spdk_reduce_vol_request *next_req;
     996         279 :         struct spdk_reduce_vol *vol = req->vol;
     997             : 
     998         279 :         req->cb_fn(req->cb_arg, reduce_errno);
     999         279 :         TAILQ_REMOVE(&vol->executing_requests, req, tailq);
    1000             : 
    1001         279 :         TAILQ_FOREACH(next_req, &vol->queued_requests, tailq) {
    1002           1 :                 if (next_req->logical_map_index == req->logical_map_index) {
    1003           1 :                         TAILQ_REMOVE(&vol->queued_requests, next_req, tailq);
    1004           1 :                         if (next_req->type == REDUCE_IO_READV) {
    1005           0 :                                 _start_readv_request(next_req);
    1006           0 :                         } else {
    1007           1 :                                 assert(next_req->type == REDUCE_IO_WRITEV);
    1008           1 :                                 _start_writev_request(next_req);
    1009             :                         }
    1010           1 :                         break;
    1011             :                 }
    1012           0 :         }
    1013             : 
    1014         279 :         TAILQ_INSERT_HEAD(&vol->free_requests, req, tailq);
    1015         279 : }
    1016             : 
    1017             : static void
    1018          25 : _write_write_done(void *_req, int reduce_errno)
    1019             : {
    1020          25 :         struct spdk_reduce_vol_request *req = _req;
    1021          25 :         struct spdk_reduce_vol *vol = req->vol;
    1022          25 :         uint64_t old_chunk_map_index;
    1023          25 :         struct spdk_reduce_chunk_map *old_chunk;
    1024          25 :         uint32_t i;
    1025             : 
    1026          25 :         if (reduce_errno != 0) {
    1027           0 :                 req->reduce_errno = reduce_errno;
    1028           0 :         }
    1029             : 
    1030          25 :         assert(req->num_backing_ops > 0);
    1031          25 :         if (--req->num_backing_ops > 0) {
    1032          12 :                 return;
    1033             :         }
    1034             : 
    1035          13 :         if (req->reduce_errno != 0) {
    1036           0 :                 _reduce_vol_complete_req(req, req->reduce_errno);
    1037           0 :                 return;
    1038             :         }
    1039             : 
    1040          13 :         old_chunk_map_index = vol->pm_logical_map[req->logical_map_index];
    1041          13 :         if (old_chunk_map_index != REDUCE_EMPTY_MAP_ENTRY) {
    1042           5 :                 old_chunk = _reduce_vol_get_chunk_map(vol, old_chunk_map_index);
    1043          16 :                 for (i = 0; i < vol->backing_io_units_per_chunk; i++) {
    1044          14 :                         if (old_chunk->io_unit_index[i] == REDUCE_EMPTY_MAP_ENTRY) {
    1045           3 :                                 break;
    1046             :                         }
    1047          11 :                         assert(spdk_bit_array_get(vol->allocated_backing_io_units, old_chunk->io_unit_index[i]) == true);
    1048          11 :                         spdk_bit_array_clear(vol->allocated_backing_io_units, old_chunk->io_unit_index[i]);
    1049          11 :                         old_chunk->io_unit_index[i] = REDUCE_EMPTY_MAP_ENTRY;
    1050          11 :                 }
    1051           5 :                 spdk_bit_array_clear(vol->allocated_chunk_maps, old_chunk_map_index);
    1052           5 :         }
    1053             : 
    1054             :         /*
    1055             :          * We don't need to persist the clearing of the old chunk map here.  The old chunk map
    1056             :          * becomes invalid after we update the logical map, since the old chunk map will no
    1057             :          * longer have a reference to it in the logical map.
    1058             :          */
    1059             : 
    1060             :         /* Persist the new chunk map.  This must be persisted before we update the logical map. */
    1061          26 :         _reduce_persist(vol, req->chunk,
    1062          13 :                         _reduce_vol_get_chunk_struct_size(vol->backing_io_units_per_chunk));
    1063             : 
    1064          13 :         vol->pm_logical_map[req->logical_map_index] = req->chunk_map_index;
    1065             : 
    1066          13 :         _reduce_persist(vol, &vol->pm_logical_map[req->logical_map_index], sizeof(uint64_t));
    1067             : 
    1068          13 :         _reduce_vol_complete_req(req, 0);
    1069          25 : }
    1070             : 
    1071             : static void
    1072         274 : _issue_backing_ops(struct spdk_reduce_vol_request *req, struct spdk_reduce_vol *vol,
    1073             :                    reduce_request_fn next_fn, bool is_write)
    1074             : {
    1075         274 :         struct iovec *iov;
    1076         274 :         uint8_t *buf;
    1077         274 :         uint32_t i;
    1078             : 
    1079         274 :         if (req->chunk_is_compressed) {
    1080         270 :                 iov = req->comp_buf_iov;
    1081         270 :                 buf = req->comp_buf;
    1082         270 :         } else {
    1083           4 :                 iov = req->decomp_buf_iov;
    1084           4 :                 buf = req->decomp_buf;
    1085             :         }
    1086             : 
    1087         274 :         req->num_backing_ops = req->num_io_units;
    1088         274 :         req->backing_cb_args.cb_fn = next_fn;
    1089         274 :         req->backing_cb_args.cb_arg = req;
    1090         560 :         for (i = 0; i < req->num_io_units; i++) {
    1091         286 :                 iov[i].iov_base = buf + i * vol->params.backing_io_unit_size;
    1092         286 :                 iov[i].iov_len = vol->params.backing_io_unit_size;
    1093         286 :                 if (is_write) {
    1094          50 :                         vol->backing_dev->writev(vol->backing_dev, &iov[i], 1,
    1095          25 :                                                  req->chunk->io_unit_index[i] * vol->backing_lba_per_io_unit,
    1096          25 :                                                  vol->backing_lba_per_io_unit, &req->backing_cb_args);
    1097          25 :                 } else {
    1098         522 :                         vol->backing_dev->readv(vol->backing_dev, &iov[i], 1,
    1099         261 :                                                 req->chunk->io_unit_index[i] * vol->backing_lba_per_io_unit,
    1100         261 :                                                 vol->backing_lba_per_io_unit, &req->backing_cb_args);
    1101             :                 }
    1102         286 :         }
    1103         274 : }
    1104             : 
    1105             : static void
    1106          13 : _reduce_vol_write_chunk(struct spdk_reduce_vol_request *req, reduce_request_fn next_fn,
    1107             :                         uint32_t compressed_size)
    1108             : {
    1109          13 :         struct spdk_reduce_vol *vol = req->vol;
    1110          13 :         uint32_t i;
    1111          13 :         uint64_t chunk_offset, remainder, total_len = 0;
    1112          13 :         uint8_t *buf;
    1113          13 :         int j;
    1114             : 
    1115          13 :         req->chunk_map_index = spdk_bit_array_find_first_clear(vol->allocated_chunk_maps, 0);
    1116             : 
    1117             :         /* TODO: fail if no chunk map found - but really this should not happen if we
    1118             :          * size the number of requests similarly to number of extra chunk maps
    1119             :          */
    1120          13 :         assert(req->chunk_map_index != UINT32_MAX);
    1121          13 :         spdk_bit_array_set(vol->allocated_chunk_maps, req->chunk_map_index);
    1122             : 
    1123          13 :         req->chunk = _reduce_vol_get_chunk_map(vol, req->chunk_map_index);
    1124          26 :         req->num_io_units = spdk_divide_round_up(compressed_size,
    1125          13 :                             vol->params.backing_io_unit_size);
    1126          13 :         req->chunk_is_compressed = (req->num_io_units != vol->backing_io_units_per_chunk);
    1127          13 :         req->chunk->compressed_size =
    1128          13 :                 req->chunk_is_compressed ? compressed_size : vol->params.chunk_size;
    1129             : 
    1130             :         /* if the chunk is uncompressed we need to copy the data from the host buffers. */
    1131          13 :         if (req->chunk_is_compressed == false) {
    1132           4 :                 chunk_offset = req->offset % vol->logical_blocks_per_chunk;
    1133           4 :                 buf = req->decomp_buf;
    1134           4 :                 total_len = chunk_offset * vol->params.logical_block_size;
    1135             : 
    1136             :                 /* zero any offset into chunk */
    1137           4 :                 if (req->rmw == false && chunk_offset) {
    1138           0 :                         memset(buf, 0, total_len);
    1139           0 :                 }
    1140           4 :                 buf += total_len;
    1141             : 
    1142             :                 /* copy the data */
    1143           8 :                 for (j = 0; j < req->iovcnt; j++) {
    1144           4 :                         memcpy(buf, req->iov[j].iov_base, req->iov[j].iov_len);
    1145           4 :                         buf += req->iov[j].iov_len;
    1146           4 :                         total_len += req->iov[j].iov_len;
    1147           4 :                 }
    1148             : 
    1149             :                 /* zero any remainder */
    1150           4 :                 remainder = vol->params.chunk_size - total_len;
    1151           4 :                 total_len += remainder;
    1152           4 :                 if (req->rmw == false && remainder) {
    1153           0 :                         memset(buf, 0, remainder);
    1154           0 :                 }
    1155           4 :                 assert(total_len == vol->params.chunk_size);
    1156           4 :         }
    1157             : 
    1158          38 :         for (i = 0; i < req->num_io_units; i++) {
    1159          25 :                 req->chunk->io_unit_index[i] = spdk_bit_array_find_first_clear(vol->allocated_backing_io_units, 0);
    1160             :                 /* TODO: fail if no backing block found - but really this should also not
    1161             :                  * happen (see comment above).
    1162             :                  */
    1163          25 :                 assert(req->chunk->io_unit_index[i] != UINT32_MAX);
    1164          25 :                 spdk_bit_array_set(vol->allocated_backing_io_units, req->chunk->io_unit_index[i]);
    1165          25 :         }
    1166             : 
    1167          13 :         _issue_backing_ops(req, vol, next_fn, true /* write */);
    1168          13 : }
    1169             : 
    1170             : static void
    1171          13 : _write_compress_done(void *_req, int reduce_errno)
    1172             : {
    1173          13 :         struct spdk_reduce_vol_request *req = _req;
    1174             : 
    1175             :         /* Negative reduce_errno indicates failure for compression operations.
    1176             :          * Just write the uncompressed data instead.  Force this to happen
    1177             :          * by just passing the full chunk size to _reduce_vol_write_chunk.
    1178             :          * When it sees the data couldn't be compressed, it will just write
    1179             :          * the uncompressed buffer to disk.
    1180             :          */
    1181          13 :         if (reduce_errno < 0) {
    1182           4 :                 req->backing_cb_args.output_size = req->vol->params.chunk_size;
    1183           4 :         }
    1184             : 
    1185          13 :         _reduce_vol_write_chunk(req, _write_write_done, req->backing_cb_args.output_size);
    1186          13 : }
    1187             : 
    1188             : static void
    1189          13 : _reduce_vol_compress_chunk(struct spdk_reduce_vol_request *req, reduce_request_fn next_fn)
    1190             : {
    1191          13 :         struct spdk_reduce_vol *vol = req->vol;
    1192             : 
    1193          13 :         req->backing_cb_args.cb_fn = next_fn;
    1194          13 :         req->backing_cb_args.cb_arg = req;
    1195          13 :         req->comp_buf_iov[0].iov_base = req->comp_buf;
    1196          13 :         req->comp_buf_iov[0].iov_len = vol->params.chunk_size;
    1197          26 :         vol->backing_dev->compress(vol->backing_dev,
    1198          13 :                                    req->decomp_iov, req->decomp_iovcnt, req->comp_buf_iov, 1,
    1199          13 :                                    &req->backing_cb_args);
    1200          13 : }
    1201             : 
    1202             : static void
    1203           3 : _reduce_vol_decompress_chunk_scratch(struct spdk_reduce_vol_request *req, reduce_request_fn next_fn)
    1204             : {
    1205           3 :         struct spdk_reduce_vol *vol = req->vol;
    1206             : 
    1207           3 :         req->backing_cb_args.cb_fn = next_fn;
    1208           3 :         req->backing_cb_args.cb_arg = req;
    1209           3 :         req->comp_buf_iov[0].iov_base = req->comp_buf;
    1210           3 :         req->comp_buf_iov[0].iov_len = req->chunk->compressed_size;
    1211           3 :         req->decomp_buf_iov[0].iov_base = req->decomp_buf;
    1212           3 :         req->decomp_buf_iov[0].iov_len = vol->params.chunk_size;
    1213           6 :         vol->backing_dev->decompress(vol->backing_dev,
    1214           3 :                                      req->comp_buf_iov, 1, req->decomp_buf_iov, 1,
    1215           3 :                                      &req->backing_cb_args);
    1216           3 : }
    1217             : 
    1218             : static void
    1219         266 : _reduce_vol_decompress_chunk(struct spdk_reduce_vol_request *req, reduce_request_fn next_fn)
    1220             : {
    1221         266 :         struct spdk_reduce_vol *vol = req->vol;
    1222         266 :         uint64_t chunk_offset, remainder = 0;
    1223         266 :         uint64_t ttl_len = 0;
    1224         266 :         size_t iov_len;
    1225         266 :         int i;
    1226             : 
    1227         266 :         req->decomp_iovcnt = 0;
    1228         266 :         chunk_offset = req->offset % vol->logical_blocks_per_chunk;
    1229             : 
    1230             :         /* If backing device doesn't support SGL output then we should copy the result of decompression to user's buffer
    1231             :          * if at least one of the conditions below is true:
    1232             :          * 1. User's buffer is fragmented
    1233             :          * 2. Length of the user's buffer is less than the chunk
    1234             :          * 3. User's buffer is contig, equals chunk_size but crosses huge page boundary */
    1235         266 :         iov_len = req->iov[0].iov_len;
    1236         271 :         req->copy_after_decompress = !vol->backing_dev->sgl_out && (req->iovcnt > 1 ||
    1237           4 :                                      req->iov[0].iov_len < vol->params.chunk_size ||
    1238           2 :                                      _addr_crosses_huge_page(req->iov[0].iov_base, &iov_len));
    1239         266 :         if (req->copy_after_decompress) {
    1240           4 :                 req->decomp_iov[0].iov_base = req->decomp_buf;
    1241           4 :                 req->decomp_iov[0].iov_len = vol->params.chunk_size;
    1242           4 :                 req->decomp_iovcnt = 1;
    1243           4 :                 goto decompress;
    1244             :         }
    1245             : 
    1246         262 :         if (chunk_offset) {
    1247             :                 /* first iov point to our scratch buffer for any offset into the chunk */
    1248         249 :                 req->decomp_iov[0].iov_base = req->decomp_buf;
    1249         249 :                 req->decomp_iov[0].iov_len = chunk_offset * vol->params.logical_block_size;
    1250         249 :                 ttl_len += req->decomp_iov[0].iov_len;
    1251         249 :                 req->decomp_iovcnt = 1;
    1252         249 :         }
    1253             : 
    1254             :         /* now the user data iov, direct to the user buffer */
    1255         527 :         for (i = 0; i < req->iovcnt; i++) {
    1256         265 :                 req->decomp_iov[i + req->decomp_iovcnt].iov_base = req->iov[i].iov_base;
    1257         265 :                 req->decomp_iov[i + req->decomp_iovcnt].iov_len = req->iov[i].iov_len;
    1258         265 :                 ttl_len += req->decomp_iov[i + req->decomp_iovcnt].iov_len;
    1259         265 :         }
    1260         262 :         req->decomp_iovcnt += req->iovcnt;
    1261             : 
    1262             :         /* send the rest of the chunk to our scratch buffer */
    1263         262 :         remainder = vol->params.chunk_size - ttl_len;
    1264         262 :         if (remainder) {
    1265         252 :                 req->decomp_iov[req->decomp_iovcnt].iov_base = req->decomp_buf + ttl_len;
    1266         252 :                 req->decomp_iov[req->decomp_iovcnt].iov_len = remainder;
    1267         252 :                 ttl_len += req->decomp_iov[req->decomp_iovcnt].iov_len;
    1268         252 :                 req->decomp_iovcnt++;
    1269         252 :         }
    1270         262 :         assert(ttl_len == vol->params.chunk_size);
    1271             : 
    1272             : decompress:
    1273         266 :         assert(!req->copy_after_decompress || (req->copy_after_decompress && req->decomp_iovcnt == 1));
    1274         266 :         req->backing_cb_args.cb_fn = next_fn;
    1275         266 :         req->backing_cb_args.cb_arg = req;
    1276         266 :         req->comp_buf_iov[0].iov_base = req->comp_buf;
    1277         266 :         req->comp_buf_iov[0].iov_len = req->chunk->compressed_size;
    1278         532 :         vol->backing_dev->decompress(vol->backing_dev,
    1279         266 :                                      req->comp_buf_iov, 1, req->decomp_iov, req->decomp_iovcnt,
    1280         266 :                                      &req->backing_cb_args);
    1281         266 : }
    1282             : 
    1283             : static inline void
    1284           8 : _prepare_compress_chunk_copy_user_buffers(struct spdk_reduce_vol_request *req, bool zero_paddings)
    1285             : {
    1286           8 :         struct spdk_reduce_vol *vol = req->vol;
    1287           8 :         char *padding_buffer = zero_paddings ? g_zero_buf : req->decomp_buf;
    1288           8 :         uint64_t chunk_offset, ttl_len = 0;
    1289           8 :         uint64_t remainder = 0;
    1290           8 :         char *copy_offset = NULL;
    1291           8 :         uint32_t lbsize = vol->params.logical_block_size;
    1292           8 :         int i;
    1293             : 
    1294           8 :         req->decomp_iov[0].iov_base = req->decomp_buf;
    1295           8 :         req->decomp_iov[0].iov_len = vol->params.chunk_size;
    1296           8 :         req->decomp_iovcnt = 1;
    1297           8 :         copy_offset = req->decomp_iov[0].iov_base;
    1298           8 :         chunk_offset = req->offset % vol->logical_blocks_per_chunk;
    1299             : 
    1300           8 :         if (chunk_offset) {
    1301           2 :                 ttl_len += chunk_offset * lbsize;
    1302             :                 /* copy_offset already points to padding buffer if zero_paddings=false */
    1303           2 :                 if (zero_paddings) {
    1304           1 :                         memcpy(copy_offset, padding_buffer, ttl_len);
    1305           1 :                 }
    1306           2 :                 copy_offset += ttl_len;
    1307           2 :         }
    1308             : 
    1309             :         /* now the user data iov, direct from the user buffer */
    1310          22 :         for (i = 0; i < req->iovcnt; i++) {
    1311          14 :                 memcpy(copy_offset, req->iov[i].iov_base, req->iov[i].iov_len);
    1312          14 :                 copy_offset += req->iov[i].iov_len;
    1313          14 :                 ttl_len += req->iov[i].iov_len;
    1314          14 :         }
    1315             : 
    1316           8 :         remainder = vol->params.chunk_size - ttl_len;
    1317           8 :         if (remainder) {
    1318             :                 /* copy_offset already points to padding buffer if zero_paddings=false */
    1319           4 :                 if (zero_paddings) {
    1320           2 :                         memcpy(copy_offset, padding_buffer + ttl_len, remainder);
    1321           2 :                 }
    1322           4 :                 ttl_len += remainder;
    1323           4 :         }
    1324             : 
    1325           8 :         assert(ttl_len == req->vol->params.chunk_size);
    1326           8 : }
    1327             : 
    1328             : /* This function can be called when we are compressing a new data or in case of read-modify-write
    1329             :  * In the first case possible paddings should be filled with zeroes, in the second case the paddings
    1330             :  * should point to already read and decompressed buffer */
    1331             : static inline void
    1332          30 : _prepare_compress_chunk(struct spdk_reduce_vol_request *req, bool zero_paddings)
    1333             : {
    1334          30 :         struct spdk_reduce_vol *vol = req->vol;
    1335          30 :         char *padding_buffer = zero_paddings ? g_zero_buf : req->decomp_buf;
    1336          30 :         uint64_t chunk_offset, ttl_len = 0;
    1337          30 :         uint64_t remainder = 0;
    1338          30 :         uint32_t lbsize = vol->params.logical_block_size;
    1339          30 :         size_t iov_len;
    1340          30 :         int i;
    1341             : 
    1342             :         /* If backing device doesn't support SGL input then we should copy user's buffer into decomp_buf
    1343             :          * if at least one of the conditions below is true:
    1344             :          * 1. User's buffer is fragmented
    1345             :          * 2. Length of the user's buffer is less than the chunk
    1346             :          * 3. User's buffer is contig, equals chunk_size but crosses huge page boundary */
    1347          30 :         iov_len = req->iov[0].iov_len;
    1348          34 :         if (!vol->backing_dev->sgl_in && (req->iovcnt > 1 ||
    1349           4 :                                           req->iov[0].iov_len < vol->params.chunk_size ||
    1350           4 :                                           _addr_crosses_huge_page(req->iov[0].iov_base, &iov_len))) {
    1351           8 :                 _prepare_compress_chunk_copy_user_buffers(req, zero_paddings);
    1352           8 :                 return;
    1353             :         }
    1354             : 
    1355          22 :         req->decomp_iovcnt = 0;
    1356          22 :         chunk_offset = req->offset % vol->logical_blocks_per_chunk;
    1357             : 
    1358          22 :         if (chunk_offset != 0) {
    1359          10 :                 ttl_len += chunk_offset * lbsize;
    1360          10 :                 req->decomp_iov[0].iov_base = padding_buffer;
    1361          10 :                 req->decomp_iov[0].iov_len = ttl_len;
    1362          10 :                 req->decomp_iovcnt = 1;
    1363          10 :         }
    1364             : 
    1365             :         /* now the user data iov, direct from the user buffer */
    1366          51 :         for (i = 0; i < req->iovcnt; i++) {
    1367          29 :                 req->decomp_iov[i + req->decomp_iovcnt].iov_base = req->iov[i].iov_base;
    1368          29 :                 req->decomp_iov[i + req->decomp_iovcnt].iov_len = req->iov[i].iov_len;
    1369          29 :                 ttl_len += req->iov[i].iov_len;
    1370          29 :         }
    1371          22 :         req->decomp_iovcnt += req->iovcnt;
    1372             : 
    1373          22 :         remainder = vol->params.chunk_size - ttl_len;
    1374          22 :         if (remainder) {
    1375          14 :                 req->decomp_iov[req->decomp_iovcnt].iov_base = padding_buffer + ttl_len;
    1376          14 :                 req->decomp_iov[req->decomp_iovcnt].iov_len = remainder;
    1377          14 :                 req->decomp_iovcnt++;
    1378          14 :                 ttl_len += remainder;
    1379          14 :         }
    1380          22 :         assert(ttl_len == req->vol->params.chunk_size);
    1381          30 : }
    1382             : 
    1383             : static void
    1384           3 : _write_decompress_done(void *_req, int reduce_errno)
    1385             : {
    1386           3 :         struct spdk_reduce_vol_request *req = _req;
    1387             : 
    1388             :         /* Negative reduce_errno indicates failure for compression operations. */
    1389           3 :         if (reduce_errno < 0) {
    1390           0 :                 _reduce_vol_complete_req(req, reduce_errno);
    1391           0 :                 return;
    1392             :         }
    1393             : 
    1394             :         /* Positive reduce_errno indicates that the output size field in the backing_cb_args
    1395             :          * represents the output_size.
    1396             :          */
    1397           3 :         if (req->backing_cb_args.output_size != req->vol->params.chunk_size) {
    1398           0 :                 _reduce_vol_complete_req(req, -EIO);
    1399           0 :                 return;
    1400             :         }
    1401             : 
    1402           3 :         _prepare_compress_chunk(req, false);
    1403           3 :         _reduce_vol_compress_chunk(req, _write_compress_done);
    1404           3 : }
    1405             : 
    1406             : static void
    1407           3 : _write_read_done(void *_req, int reduce_errno)
    1408             : {
    1409           3 :         struct spdk_reduce_vol_request *req = _req;
    1410             : 
    1411           3 :         if (reduce_errno != 0) {
    1412           0 :                 req->reduce_errno = reduce_errno;
    1413           0 :         }
    1414             : 
    1415           3 :         assert(req->num_backing_ops > 0);
    1416           3 :         if (--req->num_backing_ops > 0) {
    1417           0 :                 return;
    1418             :         }
    1419             : 
    1420           3 :         if (req->reduce_errno != 0) {
    1421           0 :                 _reduce_vol_complete_req(req, req->reduce_errno);
    1422           0 :                 return;
    1423             :         }
    1424             : 
    1425           3 :         if (req->chunk_is_compressed) {
    1426           3 :                 _reduce_vol_decompress_chunk_scratch(req, _write_decompress_done);
    1427           3 :         } else {
    1428           0 :                 _write_decompress_done(req, req->chunk->compressed_size);
    1429             :         }
    1430           3 : }
    1431             : 
    1432             : static void
    1433         266 : _read_decompress_done(void *_req, int reduce_errno)
    1434             : {
    1435         266 :         struct spdk_reduce_vol_request *req = _req;
    1436         266 :         struct spdk_reduce_vol *vol = req->vol;
    1437             : 
    1438             :         /* Negative reduce_errno indicates failure for compression operations. */
    1439         266 :         if (reduce_errno < 0) {
    1440           0 :                 _reduce_vol_complete_req(req, reduce_errno);
    1441           0 :                 return;
    1442             :         }
    1443             : 
    1444             :         /* Positive reduce_errno indicates that the output size field in the backing_cb_args
    1445             :          * represents the output_size.
    1446             :          */
    1447         266 :         if (req->backing_cb_args.output_size != vol->params.chunk_size) {
    1448           0 :                 _reduce_vol_complete_req(req, -EIO);
    1449           0 :                 return;
    1450             :         }
    1451             : 
    1452         266 :         if (req->copy_after_decompress) {
    1453           4 :                 uint64_t chunk_offset = req->offset % vol->logical_blocks_per_chunk;
    1454           4 :                 char *decomp_buffer = (char *)req->decomp_buf + chunk_offset * vol->params.logical_block_size;
    1455           4 :                 int i;
    1456             : 
    1457          11 :                 for (i = 0; i < req->iovcnt; i++) {
    1458           7 :                         memcpy(req->iov[i].iov_base, decomp_buffer, req->iov[i].iov_len);
    1459           7 :                         decomp_buffer += req->iov[i].iov_len;
    1460           7 :                         assert(decomp_buffer <= (char *)req->decomp_buf + vol->params.chunk_size);
    1461           7 :                 }
    1462           4 :         }
    1463             : 
    1464         266 :         _reduce_vol_complete_req(req, 0);
    1465         266 : }
    1466             : 
    1467             : static void
    1468         258 : _read_read_done(void *_req, int reduce_errno)
    1469             : {
    1470         258 :         struct spdk_reduce_vol_request *req = _req;
    1471         258 :         uint64_t chunk_offset;
    1472         258 :         uint8_t *buf;
    1473         258 :         int i;
    1474             : 
    1475         258 :         if (reduce_errno != 0) {
    1476           0 :                 req->reduce_errno = reduce_errno;
    1477           0 :         }
    1478             : 
    1479         258 :         assert(req->num_backing_ops > 0);
    1480         258 :         if (--req->num_backing_ops > 0) {
    1481           0 :                 return;
    1482             :         }
    1483             : 
    1484         258 :         if (req->reduce_errno != 0) {
    1485           0 :                 _reduce_vol_complete_req(req, req->reduce_errno);
    1486           0 :                 return;
    1487             :         }
    1488             : 
    1489         258 :         if (req->chunk_is_compressed) {
    1490         258 :                 _reduce_vol_decompress_chunk(req, _read_decompress_done);
    1491         258 :         } else {
    1492             : 
    1493             :                 /* If the chunk was compressed, the data would have been sent to the
    1494             :                  *  host buffers by the decompression operation, if not we need to memcpy here.
    1495             :                  */
    1496           0 :                 chunk_offset = req->offset % req->vol->logical_blocks_per_chunk;
    1497           0 :                 buf = req->decomp_buf + chunk_offset * req->vol->params.logical_block_size;
    1498           0 :                 for (i = 0; i < req->iovcnt; i++) {
    1499           0 :                         memcpy(req->iov[i].iov_base, buf, req->iov[i].iov_len);
    1500           0 :                         buf += req->iov[i].iov_len;
    1501           0 :                 }
    1502             : 
    1503           0 :                 _read_decompress_done(req, req->chunk->compressed_size);
    1504             :         }
    1505         258 : }
    1506             : 
    1507             : static void
    1508         261 : _reduce_vol_read_chunk(struct spdk_reduce_vol_request *req, reduce_request_fn next_fn)
    1509             : {
    1510         261 :         struct spdk_reduce_vol *vol = req->vol;
    1511             : 
    1512         261 :         req->chunk_map_index = vol->pm_logical_map[req->logical_map_index];
    1513         261 :         assert(req->chunk_map_index != UINT32_MAX);
    1514             : 
    1515         261 :         req->chunk = _reduce_vol_get_chunk_map(vol, req->chunk_map_index);
    1516         522 :         req->num_io_units = spdk_divide_round_up(req->chunk->compressed_size,
    1517         261 :                             vol->params.backing_io_unit_size);
    1518         261 :         req->chunk_is_compressed = (req->num_io_units != vol->backing_io_units_per_chunk);
    1519             : 
    1520         261 :         _issue_backing_ops(req, vol, next_fn, false /* read */);
    1521         261 : }
    1522             : 
    1523             : static bool
    1524         271 : _iov_array_is_valid(struct spdk_reduce_vol *vol, struct iovec *iov, int iovcnt,
    1525             :                     uint64_t length)
    1526             : {
    1527         271 :         uint64_t size = 0;
    1528         271 :         int i;
    1529             : 
    1530         271 :         if (iovcnt > REDUCE_MAX_IOVECS) {
    1531           0 :                 return false;
    1532             :         }
    1533             : 
    1534         542 :         for (i = 0; i < iovcnt; i++) {
    1535         271 :                 size += iov[i].iov_len;
    1536         271 :         }
    1537             : 
    1538         271 :         return size == (length * vol->params.logical_block_size);
    1539         271 : }
    1540             : 
    1541             : static bool
    1542         271 : _check_overlap(struct spdk_reduce_vol *vol, uint64_t logical_map_index)
    1543             : {
    1544         271 :         struct spdk_reduce_vol_request *req;
    1545             : 
    1546         271 :         TAILQ_FOREACH(req, &vol->executing_requests, tailq) {
    1547           1 :                 if (logical_map_index == req->logical_map_index) {
    1548           1 :                         return true;
    1549             :                 }
    1550           0 :         }
    1551             : 
    1552         270 :         return false;
    1553         271 : }
    1554             : 
    1555             : static void
    1556         258 : _start_readv_request(struct spdk_reduce_vol_request *req)
    1557             : {
    1558         258 :         TAILQ_INSERT_TAIL(&req->vol->executing_requests, req, tailq);
    1559         258 :         _reduce_vol_read_chunk(req, _read_read_done);
    1560         258 : }
    1561             : 
    1562             : void
    1563         258 : spdk_reduce_vol_readv(struct spdk_reduce_vol *vol,
    1564             :                       struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    1565             :                       spdk_reduce_vol_op_complete cb_fn, void *cb_arg)
    1566             : {
    1567         258 :         struct spdk_reduce_vol_request *req;
    1568         258 :         uint64_t logical_map_index;
    1569         258 :         bool overlapped;
    1570         258 :         int i;
    1571             : 
    1572         258 :         if (length == 0) {
    1573           0 :                 cb_fn(cb_arg, 0);
    1574           0 :                 return;
    1575             :         }
    1576             : 
    1577         258 :         if (_request_spans_chunk_boundary(vol, offset, length)) {
    1578           0 :                 cb_fn(cb_arg, -EINVAL);
    1579           0 :                 return;
    1580             :         }
    1581             : 
    1582         258 :         if (!_iov_array_is_valid(vol, iov, iovcnt, length)) {
    1583           0 :                 cb_fn(cb_arg, -EINVAL);
    1584           0 :                 return;
    1585             :         }
    1586             : 
    1587         258 :         logical_map_index = offset / vol->logical_blocks_per_chunk;
    1588         258 :         overlapped = _check_overlap(vol, logical_map_index);
    1589             : 
    1590         258 :         if (!overlapped && vol->pm_logical_map[logical_map_index] == REDUCE_EMPTY_MAP_ENTRY) {
    1591             :                 /*
    1592             :                  * This chunk hasn't been allocated.  So treat the data as all
    1593             :                  * zeroes for this chunk - do the memset and immediately complete
    1594             :                  * the operation.
    1595             :                  */
    1596           0 :                 for (i = 0; i < iovcnt; i++) {
    1597           0 :                         memset(iov[i].iov_base, 0, iov[i].iov_len);
    1598           0 :                 }
    1599           0 :                 cb_fn(cb_arg, 0);
    1600           0 :                 return;
    1601             :         }
    1602             : 
    1603         258 :         req = TAILQ_FIRST(&vol->free_requests);
    1604         258 :         if (req == NULL) {
    1605           0 :                 cb_fn(cb_arg, -ENOMEM);
    1606           0 :                 return;
    1607             :         }
    1608             : 
    1609         258 :         TAILQ_REMOVE(&vol->free_requests, req, tailq);
    1610         258 :         req->type = REDUCE_IO_READV;
    1611         258 :         req->vol = vol;
    1612         258 :         req->iov = iov;
    1613         258 :         req->iovcnt = iovcnt;
    1614         258 :         req->offset = offset;
    1615         258 :         req->logical_map_index = logical_map_index;
    1616         258 :         req->length = length;
    1617         258 :         req->copy_after_decompress = false;
    1618         258 :         req->cb_fn = cb_fn;
    1619         258 :         req->cb_arg = cb_arg;
    1620             : 
    1621         258 :         if (!overlapped) {
    1622         258 :                 _start_readv_request(req);
    1623         258 :         } else {
    1624           0 :                 TAILQ_INSERT_TAIL(&vol->queued_requests, req, tailq);
    1625             :         }
    1626         258 : }
    1627             : 
    1628             : static void
    1629          13 : _start_writev_request(struct spdk_reduce_vol_request *req)
    1630             : {
    1631          13 :         struct spdk_reduce_vol *vol = req->vol;
    1632             : 
    1633          13 :         TAILQ_INSERT_TAIL(&req->vol->executing_requests, req, tailq);
    1634          13 :         if (vol->pm_logical_map[req->logical_map_index] != REDUCE_EMPTY_MAP_ENTRY) {
    1635           5 :                 if ((req->length * vol->params.logical_block_size) < vol->params.chunk_size) {
    1636             :                         /* Read old chunk, then overwrite with data from this write
    1637             :                          *  operation.
    1638             :                          */
    1639           3 :                         req->rmw = true;
    1640           3 :                         _reduce_vol_read_chunk(req, _write_read_done);
    1641           3 :                         return;
    1642             :                 }
    1643           2 :         }
    1644             : 
    1645          10 :         req->rmw = false;
    1646             : 
    1647          10 :         _prepare_compress_chunk(req, true);
    1648          10 :         _reduce_vol_compress_chunk(req, _write_compress_done);
    1649          13 : }
    1650             : 
    1651             : void
    1652          15 : spdk_reduce_vol_writev(struct spdk_reduce_vol *vol,
    1653             :                        struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    1654             :                        spdk_reduce_vol_op_complete cb_fn, void *cb_arg)
    1655             : {
    1656          15 :         struct spdk_reduce_vol_request *req;
    1657          15 :         uint64_t logical_map_index;
    1658          15 :         bool overlapped;
    1659             : 
    1660          15 :         if (length == 0) {
    1661           0 :                 cb_fn(cb_arg, 0);
    1662           0 :                 return;
    1663             :         }
    1664             : 
    1665          15 :         if (_request_spans_chunk_boundary(vol, offset, length)) {
    1666           2 :                 cb_fn(cb_arg, -EINVAL);
    1667           2 :                 return;
    1668             :         }
    1669             : 
    1670          13 :         if (!_iov_array_is_valid(vol, iov, iovcnt, length)) {
    1671           0 :                 cb_fn(cb_arg, -EINVAL);
    1672           0 :                 return;
    1673             :         }
    1674             : 
    1675          13 :         logical_map_index = offset / vol->logical_blocks_per_chunk;
    1676          13 :         overlapped = _check_overlap(vol, logical_map_index);
    1677             : 
    1678          13 :         req = TAILQ_FIRST(&vol->free_requests);
    1679          13 :         if (req == NULL) {
    1680           0 :                 cb_fn(cb_arg, -ENOMEM);
    1681           0 :                 return;
    1682             :         }
    1683             : 
    1684          13 :         TAILQ_REMOVE(&vol->free_requests, req, tailq);
    1685          13 :         req->type = REDUCE_IO_WRITEV;
    1686          13 :         req->vol = vol;
    1687          13 :         req->iov = iov;
    1688          13 :         req->iovcnt = iovcnt;
    1689          13 :         req->offset = offset;
    1690          13 :         req->logical_map_index = logical_map_index;
    1691          13 :         req->length = length;
    1692          13 :         req->copy_after_decompress = false;
    1693          13 :         req->cb_fn = cb_fn;
    1694          13 :         req->cb_arg = cb_arg;
    1695             : 
    1696          13 :         if (!overlapped) {
    1697          12 :                 _start_writev_request(req);
    1698          12 :         } else {
    1699           1 :                 TAILQ_INSERT_TAIL(&vol->queued_requests, req, tailq);
    1700             :         }
    1701          15 : }
    1702             : 
    1703             : const struct spdk_reduce_vol_params *
    1704           0 : spdk_reduce_vol_get_params(struct spdk_reduce_vol *vol)
    1705             : {
    1706           0 :         return &vol->params;
    1707             : }
    1708             : 
    1709             : void
    1710           0 : spdk_reduce_vol_print_info(struct spdk_reduce_vol *vol)
    1711             : {
    1712           0 :         uint64_t logical_map_size, num_chunks, ttl_chunk_sz;
    1713           0 :         uint32_t struct_size;
    1714           0 :         uint64_t chunk_map_size;
    1715             : 
    1716           0 :         SPDK_NOTICELOG("vol info:\n");
    1717           0 :         SPDK_NOTICELOG("\tvol->params.backing_io_unit_size = 0x%x\n", vol->params.backing_io_unit_size);
    1718           0 :         SPDK_NOTICELOG("\tvol->params.logical_block_size = 0x%x\n", vol->params.logical_block_size);
    1719           0 :         SPDK_NOTICELOG("\tvol->params.chunk_size = 0x%x\n", vol->params.chunk_size);
    1720           0 :         SPDK_NOTICELOG("\tvol->params.vol_size = 0x%" PRIx64 "\n", vol->params.vol_size);
    1721           0 :         num_chunks = _get_total_chunks(vol->params.vol_size, vol->params.chunk_size);
    1722           0 :         SPDK_NOTICELOG("\ttotal chunks (including extra) = 0x%" PRIx64 "\n", num_chunks);
    1723           0 :         SPDK_NOTICELOG("\ttotal chunks (excluding extra) = 0x%" PRIx64 "\n",
    1724             :                        vol->params.vol_size / vol->params.chunk_size);
    1725           0 :         ttl_chunk_sz = _get_pm_total_chunks_size(vol->params.vol_size, vol->params.chunk_size,
    1726           0 :                         vol->params.backing_io_unit_size);
    1727           0 :         SPDK_NOTICELOG("\ttotal_chunks_size = 0x%" PRIx64 "\n", ttl_chunk_sz);
    1728           0 :         struct_size = _reduce_vol_get_chunk_struct_size(vol->backing_io_units_per_chunk);
    1729           0 :         SPDK_NOTICELOG("\tchunk_struct_size = 0x%x\n", struct_size);
    1730             : 
    1731           0 :         SPDK_NOTICELOG("pmem info:\n");
    1732           0 :         SPDK_NOTICELOG("\tvol->pm_file.size = 0x%" PRIx64 "\n", vol->pm_file.size);
    1733           0 :         SPDK_NOTICELOG("\tvol->pm_file.pm_buf = %p\n", (void *)vol->pm_file.pm_buf);
    1734           0 :         SPDK_NOTICELOG("\tvol->pm_super = %p\n", (void *)vol->pm_super);
    1735           0 :         SPDK_NOTICELOG("\tvol->pm_logical_map = %p\n", (void *)vol->pm_logical_map);
    1736           0 :         logical_map_size = _get_pm_logical_map_size(vol->params.vol_size,
    1737           0 :                            vol->params.chunk_size);
    1738           0 :         SPDK_NOTICELOG("\tlogical_map_size = 0x%" PRIx64 "\n", logical_map_size);
    1739           0 :         SPDK_NOTICELOG("\tvol->pm_chunk_maps = %p\n", (void *)vol->pm_chunk_maps);
    1740           0 :         chunk_map_size = _get_pm_total_chunks_size(vol->params.vol_size, vol->params.chunk_size,
    1741           0 :                          vol->params.backing_io_unit_size);
    1742           0 :         SPDK_NOTICELOG("\tchunk_map_size = 0x%" PRIx64 "\n", chunk_map_size);
    1743           0 : }
    1744             : 
    1745           1 : SPDK_LOG_REGISTER_COMPONENT(reduce)

Generated by: LCOV version 1.15