LCOV - code coverage report
Current view: top level - lib/blob - blobstore.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 4085 5110 79.9 %
Date: 2024-07-12 11:01:53 Functions: 337 358 94.1 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #include "spdk/blob.h"
      10             : #include "spdk/crc32.h"
      11             : #include "spdk/env.h"
      12             : #include "spdk/queue.h"
      13             : #include "spdk/thread.h"
      14             : #include "spdk/bit_array.h"
      15             : #include "spdk/bit_pool.h"
      16             : #include "spdk/likely.h"
      17             : #include "spdk/util.h"
      18             : #include "spdk/string.h"
      19             : 
      20             : #include "spdk_internal/assert.h"
      21             : #include "spdk/log.h"
      22             : 
      23             : #include "blobstore.h"
      24             : 
      25             : #define BLOB_CRC32C_INITIAL    0xffffffffUL
      26             : 
      27             : static int bs_register_md_thread(struct spdk_blob_store *bs);
      28             : static int bs_unregister_md_thread(struct spdk_blob_store *bs);
      29             : static void blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
      30             : static void blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      31             :                 uint64_t cluster, uint32_t extent, struct spdk_blob_md_page *page,
      32             :                 spdk_blob_op_complete cb_fn, void *cb_arg);
      33             : static void blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      34             :                 uint32_t extent_page, struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      35             : 
      36             : static int blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
      37             :                           uint16_t value_len, bool internal);
      38             : static int blob_get_xattr_value(struct spdk_blob *blob, const char *name,
      39             :                                 const void **value, size_t *value_len, bool internal);
      40             : static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);
      41             : 
      42             : static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
      43             :                                    struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      44             : static void blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg);
      45             : 
      46             : static void bs_shallow_copy_cluster_find_next(void *cb_arg);
      47             : 
      48             : /*
      49             :  * External snapshots require a channel per thread per esnap bdev.  The tree
      50             :  * is populated lazily as blob IOs are handled by the back_bs_dev. When this
      51             :  * channel is destroyed, all the channels in the tree are destroyed.
      52             :  */
      53             : 
      54             : struct blob_esnap_channel {
      55             :         RB_ENTRY(blob_esnap_channel)    node;
      56             :         spdk_blob_id                    blob_id;
      57             :         struct spdk_io_channel          *channel;
      58             : };
      59             : 
      60             : static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
      61             : static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
      62             :                 spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
      63             : static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
      64             : static void blob_set_back_bs_dev_frozen(void *_ctx, int bserrno);
      65       10225 : RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)
      66             : 
      67             : static inline bool
      68       49534 : blob_is_esnap_clone(const struct spdk_blob *blob)
      69             : {
      70       49534 :         assert(blob != NULL);
      71       49534 :         return !!(blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT);
      72             : }
      73             : 
      74             : static int
      75        2273 : blob_id_cmp(struct spdk_blob *blob1, struct spdk_blob *blob2)
      76             : {
      77        2273 :         assert(blob1 != NULL && blob2 != NULL);
      78        2273 :         return (blob1->id < blob2->id ? -1 : blob1->id > blob2->id);
      79             : }
      80             : 
      81       15289 : RB_GENERATE_STATIC(spdk_blob_tree, spdk_blob, link, blob_id_cmp);
      82             : 
      83             : static void
      84       36883 : blob_verify_md_op(struct spdk_blob *blob)
      85             : {
      86       36883 :         assert(blob != NULL);
      87       36883 :         assert(spdk_get_thread() == blob->bs->md_thread);
      88       36883 :         assert(blob->state != SPDK_BLOB_STATE_LOADING);
      89       36883 : }
      90             : 
      91             : static struct spdk_blob_list *
      92        3816 : bs_get_snapshot_entry(struct spdk_blob_store *bs, spdk_blob_id blobid)
      93             : {
      94        3816 :         struct spdk_blob_list *snapshot_entry = NULL;
      95             : 
      96        4796 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
      97        1748 :                 if (snapshot_entry->id == blobid) {
      98         768 :                         break;
      99             :                 }
     100             :         }
     101             : 
     102        3816 :         return snapshot_entry;
     103             : }
     104             : 
     105             : static void
     106        2900 : bs_claim_md_page(struct spdk_blob_store *bs, uint32_t page)
     107             : {
     108        2900 :         assert(spdk_spin_held(&bs->used_lock));
     109        2900 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     110        2900 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == false);
     111             : 
     112        2900 :         spdk_bit_array_set(bs->used_md_pages, page);
     113        2900 : }
     114             : 
     115             : static void
     116        2196 : bs_release_md_page(struct spdk_blob_store *bs, uint32_t page)
     117             : {
     118        2196 :         assert(spdk_spin_held(&bs->used_lock));
     119        2196 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     120        2196 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == true);
     121             : 
     122        2196 :         spdk_bit_array_clear(bs->used_md_pages, page);
     123        2196 : }
     124             : 
     125             : static uint32_t
     126        8220 : bs_claim_cluster(struct spdk_blob_store *bs)
     127             : {
     128             :         uint32_t cluster_num;
     129             : 
     130        8220 :         assert(spdk_spin_held(&bs->used_lock));
     131             : 
     132        8220 :         cluster_num = spdk_bit_pool_allocate_bit(bs->used_clusters);
     133        8220 :         if (cluster_num == UINT32_MAX) {
     134           0 :                 return UINT32_MAX;
     135             :         }
     136             : 
     137        8220 :         SPDK_DEBUGLOG(blob, "Claiming cluster %u\n", cluster_num);
     138        8220 :         bs->num_free_clusters--;
     139             : 
     140        8220 :         return cluster_num;
     141             : }
     142             : 
     143             : static void
     144        2399 : bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
     145             : {
     146        2399 :         assert(spdk_spin_held(&bs->used_lock));
     147        2399 :         assert(cluster_num < spdk_bit_pool_capacity(bs->used_clusters));
     148        2399 :         assert(spdk_bit_pool_is_allocated(bs->used_clusters, cluster_num) == true);
     149        2399 :         assert(bs->num_free_clusters < bs->total_clusters);
     150             : 
     151        2399 :         SPDK_DEBUGLOG(blob, "Releasing cluster %u\n", cluster_num);
     152             : 
     153        2399 :         spdk_bit_pool_free_bit(bs->used_clusters, cluster_num);
     154        2399 :         bs->num_free_clusters++;
     155        2399 : }
     156             : 
     157             : static int
     158        8220 : blob_insert_cluster(struct spdk_blob *blob, uint32_t cluster_num, uint64_t cluster)
     159             : {
     160        8220 :         uint64_t *cluster_lba = &blob->active.clusters[cluster_num];
     161             : 
     162        8220 :         blob_verify_md_op(blob);
     163             : 
     164        8220 :         if (*cluster_lba != 0) {
     165           4 :                 return -EEXIST;
     166             :         }
     167             : 
     168        8216 :         *cluster_lba = bs_cluster_to_lba(blob->bs, cluster);
     169        8216 :         blob->active.num_allocated_clusters++;
     170             : 
     171        8216 :         return 0;
     172             : }
     173             : 
     174             : static int
     175        8220 : bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
     176             :                     uint64_t *cluster, uint32_t *lowest_free_md_page, bool update_map)
     177             : {
     178        8220 :         uint32_t *extent_page = 0;
     179             : 
     180        8220 :         assert(spdk_spin_held(&blob->bs->used_lock));
     181             : 
     182        8220 :         *cluster = bs_claim_cluster(blob->bs);
     183        8220 :         if (*cluster == UINT32_MAX) {
     184             :                 /* No more free clusters. Cannot satisfy the request */
     185           0 :                 return -ENOSPC;
     186             :         }
     187             : 
     188        8220 :         if (blob->use_extent_table) {
     189        4168 :                 extent_page = bs_cluster_to_extent_page(blob, cluster_num);
     190        4168 :                 if (*extent_page == 0) {
     191             :                         /* Extent page shall never occupy md_page so start the search from 1 */
     192         728 :                         if (*lowest_free_md_page == 0) {
     193         726 :                                 *lowest_free_md_page = 1;
     194             :                         }
     195             :                         /* No extent_page is allocated for the cluster */
     196         728 :                         *lowest_free_md_page = spdk_bit_array_find_first_clear(blob->bs->used_md_pages,
     197             :                                                *lowest_free_md_page);
     198         728 :                         if (*lowest_free_md_page == UINT32_MAX) {
     199             :                                 /* No more free md pages. Cannot satisfy the request */
     200           0 :                                 bs_release_cluster(blob->bs, *cluster);
     201           0 :                                 return -ENOSPC;
     202             :                         }
     203         728 :                         bs_claim_md_page(blob->bs, *lowest_free_md_page);
     204             :                 }
     205             :         }
     206             : 
     207        8220 :         SPDK_DEBUGLOG(blob, "Claiming cluster %" PRIu64 " for blob 0x%" PRIx64 "\n", *cluster,
     208             :                       blob->id);
     209             : 
     210        8220 :         if (update_map) {
     211        7404 :                 blob_insert_cluster(blob, cluster_num, *cluster);
     212        7404 :                 if (blob->use_extent_table && *extent_page == 0) {
     213         644 :                         *extent_page = *lowest_free_md_page;
     214             :                 }
     215             :         }
     216             : 
     217        8220 :         return 0;
     218             : }
     219             : 
     220             : static void
     221        5570 : blob_xattrs_init(struct spdk_blob_xattr_opts *xattrs)
     222             : {
     223        5570 :         xattrs->count = 0;
     224        5570 :         xattrs->names = NULL;
     225        5570 :         xattrs->ctx = NULL;
     226        5570 :         xattrs->get_value = NULL;
     227        5570 : }
     228             : 
     229             : void
     230        3680 : spdk_blob_opts_init(struct spdk_blob_opts *opts, size_t opts_size)
     231             : {
     232        3680 :         if (!opts) {
     233           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     234           0 :                 return;
     235             :         }
     236             : 
     237        3680 :         if (!opts_size) {
     238           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     239           0 :                 return;
     240             :         }
     241             : 
     242        3680 :         memset(opts, 0, opts_size);
     243        3680 :         opts->opts_size = opts_size;
     244             : 
     245             : #define FIELD_OK(field) \
     246             :         offsetof(struct spdk_blob_opts, field) + sizeof(opts->field) <= opts_size
     247             : 
     248             : #define SET_FIELD(field, value) \
     249             :         if (FIELD_OK(field)) { \
     250             :                 opts->field = value; \
     251             :         } \
     252             : 
     253        3680 :         SET_FIELD(num_clusters, 0);
     254        3680 :         SET_FIELD(thin_provision, false);
     255        3680 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     256             : 
     257        3680 :         if (FIELD_OK(xattrs)) {
     258        3680 :                 blob_xattrs_init(&opts->xattrs);
     259             :         }
     260             : 
     261        3680 :         SET_FIELD(use_extent_table, true);
     262             : 
     263             : #undef FIELD_OK
     264             : #undef SET_FIELD
     265             : }
     266             : 
     267             : void
     268        3470 : spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts, size_t opts_size)
     269             : {
     270        3470 :         if (!opts) {
     271           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     272           0 :                 return;
     273             :         }
     274             : 
     275        3470 :         if (!opts_size) {
     276           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     277           0 :                 return;
     278             :         }
     279             : 
     280        3470 :         memset(opts, 0, opts_size);
     281        3470 :         opts->opts_size = opts_size;
     282             : 
     283             : #define FIELD_OK(field) \
     284             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(opts->field) <= opts_size
     285             : 
     286             : #define SET_FIELD(field, value) \
     287             :         if (FIELD_OK(field)) { \
     288             :                 opts->field = value; \
     289             :         } \
     290             : 
     291        3470 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     292             : 
     293             : #undef FIELD_OK
     294             : #undef SET_FILED
     295             : }
     296             : 
     297             : static struct spdk_blob *
     298        5356 : blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id)
     299             : {
     300             :         struct spdk_blob *blob;
     301             : 
     302        5356 :         blob = calloc(1, sizeof(*blob));
     303        5356 :         if (!blob) {
     304           0 :                 return NULL;
     305             :         }
     306             : 
     307        5356 :         blob->id = id;
     308        5356 :         blob->bs = bs;
     309             : 
     310        5356 :         blob->parent_id = SPDK_BLOBID_INVALID;
     311             : 
     312        5356 :         blob->state = SPDK_BLOB_STATE_DIRTY;
     313        5356 :         blob->extent_rle_found = false;
     314        5356 :         blob->extent_table_found = false;
     315        5356 :         blob->active.num_pages = 1;
     316        5356 :         blob->active.pages = calloc(1, sizeof(*blob->active.pages));
     317        5356 :         if (!blob->active.pages) {
     318           0 :                 free(blob);
     319           0 :                 return NULL;
     320             :         }
     321             : 
     322        5356 :         blob->active.pages[0] = bs_blobid_to_page(id);
     323             : 
     324        5356 :         TAILQ_INIT(&blob->xattrs);
     325        5356 :         TAILQ_INIT(&blob->xattrs_internal);
     326        5356 :         TAILQ_INIT(&blob->pending_persists);
     327        5356 :         TAILQ_INIT(&blob->persists_to_complete);
     328             : 
     329        5356 :         return blob;
     330             : }
     331             : 
     332             : static void
     333       10712 : xattrs_free(struct spdk_xattr_tailq *xattrs)
     334             : {
     335             :         struct spdk_xattr       *xattr, *xattr_tmp;
     336             : 
     337       12466 :         TAILQ_FOREACH_SAFE(xattr, xattrs, link, xattr_tmp) {
     338        1754 :                 TAILQ_REMOVE(xattrs, xattr, link);
     339        1754 :                 free(xattr->name);
     340        1754 :                 free(xattr->value);
     341        1754 :                 free(xattr);
     342             :         }
     343       10712 : }
     344             : 
     345             : static void
     346        5356 : blob_free(struct spdk_blob *blob)
     347             : {
     348        5356 :         assert(blob != NULL);
     349        5356 :         assert(TAILQ_EMPTY(&blob->pending_persists));
     350        5356 :         assert(TAILQ_EMPTY(&blob->persists_to_complete));
     351             : 
     352        5356 :         free(blob->active.extent_pages);
     353        5356 :         free(blob->clean.extent_pages);
     354        5356 :         free(blob->active.clusters);
     355        5356 :         free(blob->clean.clusters);
     356        5356 :         free(blob->active.pages);
     357        5356 :         free(blob->clean.pages);
     358             : 
     359        5356 :         xattrs_free(&blob->xattrs);
     360        5356 :         xattrs_free(&blob->xattrs_internal);
     361             : 
     362        5356 :         if (blob->back_bs_dev) {
     363        1080 :                 blob->back_bs_dev->destroy(blob->back_bs_dev);
     364             :         }
     365             : 
     366        5356 :         free(blob);
     367        5356 : }
     368             : 
     369             : static void
     370         320 : blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
     371             : {
     372         320 :         struct spdk_bs_dev      *bs_dev = ctx;
     373             : 
     374         320 :         if (bserrno != 0) {
     375             :                 /*
     376             :                  * This is probably due to a memory allocation failure when creating the
     377             :                  * blob_esnap_destroy_ctx before iterating threads.
     378             :                  */
     379           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
     380             :                             blob->id, bserrno);
     381           0 :                 assert(false);
     382             :         }
     383             : 
     384         320 :         if (bs_dev == NULL) {
     385             :                 /*
     386             :                  * This check exists to make scanbuild happy.
     387             :                  *
     388             :                  * blob->back_bs_dev for an esnap is NULL during the first iteration of blobs while
     389             :                  * the blobstore is being loaded. It could also be NULL if there was an error
     390             :                  * opening the esnap device. In each of these cases, no channels could have been
     391             :                  * created because back_bs_dev->create_channel() would have led to a NULL pointer
     392             :                  * deref.
     393             :                  */
     394           0 :                 assert(false);
     395             :                 return;
     396             :         }
     397             : 
     398         320 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
     399         320 :         bs_dev->destroy(bs_dev);
     400             : }
     401             : 
     402             : static void
     403         320 : blob_back_bs_destroy(struct spdk_blob *blob)
     404             : {
     405         320 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
     406             :                       blob->id);
     407             : 
     408         320 :         blob_esnap_destroy_bs_dev_channels(blob, false, blob_back_bs_destroy_esnap_done,
     409         320 :                                            blob->back_bs_dev);
     410         320 :         blob->back_bs_dev = NULL;
     411         320 : }
     412             : 
     413             : struct blob_parent {
     414             :         union {
     415             :                 struct {
     416             :                         spdk_blob_id id;
     417             :                         struct spdk_blob *blob;
     418             :                 } snapshot;
     419             : 
     420             :                 struct {
     421             :                         void *id;
     422             :                         uint32_t id_len;
     423             :                         struct spdk_bs_dev *back_bs_dev;
     424             :                 } esnap;
     425             :         } u;
     426             : };
     427             : 
     428             : typedef int (*set_parent_refs_cb)(struct spdk_blob *blob, struct blob_parent *parent);
     429             : 
     430             : struct set_bs_dev_ctx {
     431             :         struct spdk_blob        *blob;
     432             :         struct spdk_bs_dev      *back_bs_dev;
     433             : 
     434             :         /*
     435             :          * This callback is used during a set parent operation to change the references
     436             :          * to the parent of the blob.
     437             :          */
     438             :         set_parent_refs_cb      parent_refs_cb_fn;
     439             :         struct blob_parent      *parent_refs_cb_arg;
     440             : 
     441             :         spdk_blob_op_complete   cb_fn;
     442             :         void                    *cb_arg;
     443             :         int                     bserrno;
     444             : };
     445             : 
     446             : static void
     447          28 : blob_set_back_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
     448             :                      set_parent_refs_cb parent_refs_cb_fn, struct blob_parent *parent_refs_cb_arg,
     449             :                      spdk_blob_op_complete cb_fn, void *cb_arg)
     450             : {
     451             :         struct set_bs_dev_ctx   *ctx;
     452             : 
     453          28 :         ctx = calloc(1, sizeof(*ctx));
     454          28 :         if (ctx == NULL) {
     455           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": out of memory while setting back_bs_dev\n",
     456             :                             blob->id);
     457           0 :                 cb_fn(cb_arg, -ENOMEM);
     458           0 :                 return;
     459             :         }
     460             : 
     461          28 :         ctx->parent_refs_cb_fn = parent_refs_cb_fn;
     462          28 :         ctx->parent_refs_cb_arg = parent_refs_cb_arg;
     463          28 :         ctx->cb_fn = cb_fn;
     464          28 :         ctx->cb_arg = cb_arg;
     465          28 :         ctx->back_bs_dev = back_bs_dev;
     466          28 :         ctx->blob = blob;
     467             : 
     468          28 :         blob_freeze_io(blob, blob_set_back_bs_dev_frozen, ctx);
     469             : }
     470             : 
     471             : struct freeze_io_ctx {
     472             :         struct spdk_bs_cpl cpl;
     473             :         struct spdk_blob *blob;
     474             : };
     475             : 
     476             : static void
     477         526 : blob_io_sync(struct spdk_io_channel_iter *i)
     478             : {
     479         526 :         spdk_for_each_channel_continue(i, 0);
     480         526 : }
     481             : 
     482             : static void
     483         514 : blob_execute_queued_io(struct spdk_io_channel_iter *i)
     484             : {
     485         514 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
     486         514 :         struct spdk_bs_channel *ch = spdk_io_channel_get_ctx(_ch);
     487         514 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     488             :         struct spdk_bs_request_set      *set;
     489             :         struct spdk_bs_user_op_args     *args;
     490             :         spdk_bs_user_op_t *op, *tmp;
     491             : 
     492         518 :         TAILQ_FOREACH_SAFE(op, &ch->queued_io, link, tmp) {
     493           4 :                 set = (struct spdk_bs_request_set *)op;
     494           4 :                 args = &set->u.user_op;
     495             : 
     496           4 :                 if (args->blob == ctx->blob) {
     497           4 :                         TAILQ_REMOVE(&ch->queued_io, op, link);
     498           4 :                         bs_user_op_execute(op);
     499             :                 }
     500             :         }
     501             : 
     502         514 :         spdk_for_each_channel_continue(i, 0);
     503         514 : }
     504             : 
     505             : static void
     506        1008 : blob_io_cpl(struct spdk_io_channel_iter *i, int status)
     507             : {
     508        1008 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     509             : 
     510        1008 :         ctx->cpl.u.blob_basic.cb_fn(ctx->cpl.u.blob_basic.cb_arg, 0);
     511             : 
     512        1008 :         free(ctx);
     513        1008 : }
     514             : 
     515             : static void
     516         510 : blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     517             : {
     518             :         struct freeze_io_ctx *ctx;
     519             : 
     520         510 :         blob_verify_md_op(blob);
     521             : 
     522         510 :         ctx = calloc(1, sizeof(*ctx));
     523         510 :         if (!ctx) {
     524           0 :                 cb_fn(cb_arg, -ENOMEM);
     525           0 :                 return;
     526             :         }
     527             : 
     528         510 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     529         510 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     530         510 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     531         510 :         ctx->blob = blob;
     532             : 
     533             :         /* Freeze I/O on blob */
     534         510 :         blob->frozen_refcnt++;
     535             : 
     536         510 :         spdk_for_each_channel(blob->bs, blob_io_sync, ctx, blob_io_cpl);
     537             : }
     538             : 
     539             : static void
     540         498 : blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     541             : {
     542             :         struct freeze_io_ctx *ctx;
     543             : 
     544         498 :         blob_verify_md_op(blob);
     545             : 
     546         498 :         ctx = calloc(1, sizeof(*ctx));
     547         498 :         if (!ctx) {
     548           0 :                 cb_fn(cb_arg, -ENOMEM);
     549           0 :                 return;
     550             :         }
     551             : 
     552         498 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     553         498 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     554         498 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     555         498 :         ctx->blob = blob;
     556             : 
     557         498 :         assert(blob->frozen_refcnt > 0);
     558             : 
     559         498 :         blob->frozen_refcnt--;
     560             : 
     561         498 :         spdk_for_each_channel(blob->bs, blob_execute_queued_io, ctx, blob_io_cpl);
     562             : }
     563             : 
     564             : static int
     565        8442 : blob_mark_clean(struct spdk_blob *blob)
     566             : {
     567        8442 :         uint32_t *extent_pages = NULL;
     568        8442 :         uint64_t *clusters = NULL;
     569        8442 :         uint32_t *pages = NULL;
     570             : 
     571        8442 :         assert(blob != NULL);
     572             : 
     573        8442 :         if (blob->active.num_extent_pages) {
     574        2845 :                 assert(blob->active.extent_pages);
     575        2845 :                 extent_pages = calloc(blob->active.num_extent_pages, sizeof(*blob->active.extent_pages));
     576        2845 :                 if (!extent_pages) {
     577           0 :                         return -ENOMEM;
     578             :                 }
     579        2845 :                 memcpy(extent_pages, blob->active.extent_pages,
     580        2845 :                        blob->active.num_extent_pages * sizeof(*extent_pages));
     581             :         }
     582             : 
     583        8442 :         if (blob->active.num_clusters) {
     584        5918 :                 assert(blob->active.clusters);
     585        5918 :                 clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
     586        5918 :                 if (!clusters) {
     587           0 :                         free(extent_pages);
     588           0 :                         return -ENOMEM;
     589             :                 }
     590        5918 :                 memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
     591             :         }
     592             : 
     593        8442 :         if (blob->active.num_pages) {
     594        6958 :                 assert(blob->active.pages);
     595        6958 :                 pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
     596        6958 :                 if (!pages) {
     597           0 :                         free(extent_pages);
     598           0 :                         free(clusters);
     599           0 :                         return -ENOMEM;
     600             :                 }
     601        6958 :                 memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
     602             :         }
     603             : 
     604        8442 :         free(blob->clean.extent_pages);
     605        8442 :         free(blob->clean.clusters);
     606        8442 :         free(blob->clean.pages);
     607             : 
     608        8442 :         blob->clean.num_extent_pages = blob->active.num_extent_pages;
     609        8442 :         blob->clean.extent_pages = blob->active.extent_pages;
     610        8442 :         blob->clean.num_clusters = blob->active.num_clusters;
     611        8442 :         blob->clean.clusters = blob->active.clusters;
     612        8442 :         blob->clean.num_allocated_clusters = blob->active.num_allocated_clusters;
     613        8442 :         blob->clean.num_pages = blob->active.num_pages;
     614        8442 :         blob->clean.pages = blob->active.pages;
     615             : 
     616        8442 :         blob->active.extent_pages = extent_pages;
     617        8442 :         blob->active.clusters = clusters;
     618        8442 :         blob->active.pages = pages;
     619             : 
     620             :         /* If the metadata was dirtied again while the metadata was being written to disk,
     621             :          *  we do not want to revert the DIRTY state back to CLEAN here.
     622             :          */
     623        8442 :         if (blob->state == SPDK_BLOB_STATE_LOADING) {
     624        3402 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
     625             :         }
     626             : 
     627        8442 :         return 0;
     628             : }
     629             : 
     630             : static int
     631        1276 : blob_deserialize_xattr(struct spdk_blob *blob,
     632             :                        struct spdk_blob_md_descriptor_xattr *desc_xattr, bool internal)
     633             : {
     634             :         struct spdk_xattr                       *xattr;
     635             : 
     636        1276 :         if (desc_xattr->length != sizeof(desc_xattr->name_length) +
     637             :             sizeof(desc_xattr->value_length) +
     638        1276 :             desc_xattr->name_length + desc_xattr->value_length) {
     639           0 :                 return -EINVAL;
     640             :         }
     641             : 
     642        1276 :         xattr = calloc(1, sizeof(*xattr));
     643        1276 :         if (xattr == NULL) {
     644           0 :                 return -ENOMEM;
     645             :         }
     646             : 
     647        1276 :         xattr->name = malloc(desc_xattr->name_length + 1);
     648        1276 :         if (xattr->name == NULL) {
     649           0 :                 free(xattr);
     650           0 :                 return -ENOMEM;
     651             :         }
     652             : 
     653        1276 :         xattr->value = malloc(desc_xattr->value_length);
     654        1276 :         if (xattr->value == NULL) {
     655           0 :                 free(xattr->name);
     656           0 :                 free(xattr);
     657           0 :                 return -ENOMEM;
     658             :         }
     659             : 
     660        1276 :         memcpy(xattr->name, desc_xattr->name, desc_xattr->name_length);
     661        1276 :         xattr->name[desc_xattr->name_length] = '\0';
     662        1276 :         xattr->value_len = desc_xattr->value_length;
     663        1276 :         memcpy(xattr->value,
     664        1276 :                (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
     665        1276 :                desc_xattr->value_length);
     666             : 
     667        1276 :         TAILQ_INSERT_TAIL(internal ? &blob->xattrs_internal : &blob->xattrs, xattr, link);
     668             : 
     669        1276 :         return 0;
     670             : }
     671             : 
     672             : 
     673             : static int
     674        4580 : blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob)
     675             : {
     676             :         struct spdk_blob_md_descriptor *desc;
     677        4580 :         size_t  cur_desc = 0;
     678             :         void *tmp;
     679             : 
     680        4580 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
     681       13444 :         while (cur_desc < sizeof(page->descriptors)) {
     682       13444 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
     683        4532 :                         if (desc->length == 0) {
     684             :                                 /* If padding and length are 0, this terminates the page */
     685        4532 :                                 break;
     686             :                         }
     687        8912 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
     688             :                         struct spdk_blob_md_descriptor_flags    *desc_flags;
     689             : 
     690        3434 :                         desc_flags = (struct spdk_blob_md_descriptor_flags *)desc;
     691             : 
     692        3434 :                         if (desc_flags->length != sizeof(*desc_flags) - sizeof(*desc)) {
     693           0 :                                 return -EINVAL;
     694             :                         }
     695             : 
     696        3434 :                         if ((desc_flags->invalid_flags | SPDK_BLOB_INVALID_FLAGS_MASK) !=
     697             :                             SPDK_BLOB_INVALID_FLAGS_MASK) {
     698           8 :                                 return -EINVAL;
     699             :                         }
     700             : 
     701        3426 :                         if ((desc_flags->data_ro_flags | SPDK_BLOB_DATA_RO_FLAGS_MASK) !=
     702             :                             SPDK_BLOB_DATA_RO_FLAGS_MASK) {
     703          12 :                                 blob->data_ro = true;
     704          12 :                                 blob->md_ro = true;
     705             :                         }
     706             : 
     707        3426 :                         if ((desc_flags->md_ro_flags | SPDK_BLOB_MD_RO_FLAGS_MASK) !=
     708             :                             SPDK_BLOB_MD_RO_FLAGS_MASK) {
     709          12 :                                 blob->md_ro = true;
     710             :                         }
     711             : 
     712        3426 :                         if ((desc_flags->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
     713         562 :                                 blob->data_ro = true;
     714         562 :                                 blob->md_ro = true;
     715             :                         }
     716             : 
     717        3426 :                         blob->invalid_flags = desc_flags->invalid_flags;
     718        3426 :                         blob->data_ro_flags = desc_flags->data_ro_flags;
     719        3426 :                         blob->md_ro_flags = desc_flags->md_ro_flags;
     720             : 
     721        5478 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
     722             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
     723             :                         unsigned int                            i, j;
     724        1392 :                         unsigned int                            cluster_count = blob->active.num_clusters;
     725             : 
     726        1392 :                         if (blob->extent_table_found) {
     727             :                                 /* Extent Table already present in the md,
     728             :                                  * both descriptors should never be at the same time. */
     729           0 :                                 return -EINVAL;
     730             :                         }
     731        1392 :                         blob->extent_rle_found = true;
     732             : 
     733        1392 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
     734             : 
     735        1392 :                         if (desc_extent_rle->length == 0 ||
     736        1392 :                             (desc_extent_rle->length % sizeof(desc_extent_rle->extents[0]) != 0)) {
     737           0 :                                 return -EINVAL;
     738             :                         }
     739             : 
     740        2962 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     741       21238 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     742       19668 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     743        6692 :                                                 if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters,
     744        6692 :                                                                                 desc_extent_rle->extents[i].cluster_idx + j)) {
     745           0 :                                                         return -EINVAL;
     746             :                                                 }
     747             :                                         }
     748       19668 :                                         cluster_count++;
     749             :                                 }
     750             :                         }
     751             : 
     752        1392 :                         if (cluster_count == 0) {
     753           0 :                                 return -EINVAL;
     754             :                         }
     755        1392 :                         tmp = realloc(blob->active.clusters, cluster_count * sizeof(*blob->active.clusters));
     756        1392 :                         if (tmp == NULL) {
     757           0 :                                 return -ENOMEM;
     758             :                         }
     759        1392 :                         blob->active.clusters = tmp;
     760        1392 :                         blob->active.cluster_array_size = cluster_count;
     761             : 
     762        2962 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     763       21238 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     764       19668 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     765       13384 :                                                 blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     766        6692 :                                                                 desc_extent_rle->extents[i].cluster_idx + j);
     767        6692 :                                                 blob->active.num_allocated_clusters++;
     768       12976 :                                         } else if (spdk_blob_is_thin_provisioned(blob)) {
     769       12976 :                                                 blob->active.clusters[blob->active.num_clusters++] = 0;
     770             :                                         } else {
     771           0 :                                                 return -EINVAL;
     772             :                                         }
     773             :                                 }
     774             :                         }
     775        4086 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
     776             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
     777        1764 :                         uint32_t num_extent_pages = blob->active.num_extent_pages;
     778             :                         uint32_t i, j;
     779             :                         size_t extent_pages_length;
     780             : 
     781        1764 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
     782        1764 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
     783             : 
     784        1764 :                         if (blob->extent_rle_found) {
     785             :                                 /* This means that Extent RLE is present in MD,
     786             :                                  * both should never be at the same time. */
     787           0 :                                 return -EINVAL;
     788        1764 :                         } else if (blob->extent_table_found &&
     789           0 :                                    desc_extent_table->num_clusters != blob->remaining_clusters_in_et) {
     790             :                                 /* Number of clusters in this ET does not match number
     791             :                                  * from previously read EXTENT_TABLE. */
     792           0 :                                 return -EINVAL;
     793             :                         }
     794             : 
     795        1764 :                         if (desc_extent_table->length == 0 ||
     796        1764 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
     797           0 :                                 return -EINVAL;
     798             :                         }
     799             : 
     800        1764 :                         blob->extent_table_found = true;
     801             : 
     802        3238 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     803        1474 :                                 num_extent_pages += desc_extent_table->extent_page[i].num_pages;
     804             :                         }
     805             : 
     806        1764 :                         if (num_extent_pages > 0) {
     807        1458 :                                 tmp = realloc(blob->active.extent_pages, num_extent_pages * sizeof(uint32_t));
     808        1458 :                                 if (tmp == NULL) {
     809           0 :                                         return -ENOMEM;
     810             :                                 }
     811        1458 :                                 blob->active.extent_pages = tmp;
     812             :                         }
     813        1764 :                         blob->active.extent_pages_array_size = num_extent_pages;
     814             : 
     815        1764 :                         blob->remaining_clusters_in_et = desc_extent_table->num_clusters;
     816             : 
     817             :                         /* Extent table entries contain md page numbers for extent pages.
     818             :                          * Zeroes represent unallocated extent pages, those are run-length-encoded.
     819             :                          */
     820        3238 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     821        1474 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
     822        1052 :                                         assert(desc_extent_table->extent_page[i].num_pages == 1);
     823        1052 :                                         blob->active.extent_pages[blob->active.num_extent_pages++] =
     824        1052 :                                                 desc_extent_table->extent_page[i].page_idx;
     825         422 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     826         844 :                                         for (j = 0; j < desc_extent_table->extent_page[i].num_pages; j++) {
     827         422 :                                                 blob->active.extent_pages[blob->active.num_extent_pages++] = 0;
     828             :                                         }
     829             :                                 } else {
     830           0 :                                         return -EINVAL;
     831             :                                 }
     832             :                         }
     833        2322 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
     834             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
     835             :                         unsigned int                                    i;
     836        1046 :                         unsigned int                                    cluster_count = 0;
     837             :                         size_t                                          cluster_idx_length;
     838             : 
     839        1046 :                         if (blob->extent_rle_found) {
     840             :                                 /* This means that Extent RLE is present in MD,
     841             :                                  * both should never be at the same time. */
     842           0 :                                 return -EINVAL;
     843             :                         }
     844             : 
     845        1046 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
     846        1046 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
     847             : 
     848        1046 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
     849        1046 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
     850           0 :                                 return -EINVAL;
     851             :                         }
     852             : 
     853       16344 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     854       15298 :                                 if (desc_extent->cluster_idx[i] != 0) {
     855        6962 :                                         if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters, desc_extent->cluster_idx[i])) {
     856           0 :                                                 return -EINVAL;
     857             :                                         }
     858             :                                 }
     859       15298 :                                 cluster_count++;
     860             :                         }
     861             : 
     862        1046 :                         if (cluster_count == 0) {
     863           0 :                                 return -EINVAL;
     864             :                         }
     865             : 
     866             :                         /* When reading extent pages sequentially starting cluster idx should match
     867             :                          * current size of a blob.
     868             :                          * If changed to batch reading, this check shall be removed. */
     869        1046 :                         if (desc_extent->start_cluster_idx != blob->active.num_clusters) {
     870           0 :                                 return -EINVAL;
     871             :                         }
     872             : 
     873        1046 :                         tmp = realloc(blob->active.clusters,
     874        1046 :                                       (cluster_count + blob->active.num_clusters) * sizeof(*blob->active.clusters));
     875        1046 :                         if (tmp == NULL) {
     876           0 :                                 return -ENOMEM;
     877             :                         }
     878        1046 :                         blob->active.clusters = tmp;
     879        1046 :                         blob->active.cluster_array_size = (cluster_count + blob->active.num_clusters);
     880             : 
     881       16344 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     882       15298 :                                 if (desc_extent->cluster_idx[i] != 0) {
     883        6962 :                                         blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     884             :                                                         desc_extent->cluster_idx[i]);
     885        6962 :                                         blob->active.num_allocated_clusters++;
     886        8336 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     887        8336 :                                         blob->active.clusters[blob->active.num_clusters++] = 0;
     888             :                                 } else {
     889           0 :                                         return -EINVAL;
     890             :                                 }
     891             :                         }
     892        1046 :                         assert(desc_extent->start_cluster_idx + cluster_count == blob->active.num_clusters);
     893        1046 :                         assert(blob->remaining_clusters_in_et >= cluster_count);
     894        1046 :                         blob->remaining_clusters_in_et -= cluster_count;
     895        1276 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
     896             :                         int rc;
     897             : 
     898         394 :                         rc = blob_deserialize_xattr(blob,
     899             :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, false);
     900         394 :                         if (rc != 0) {
     901           0 :                                 return rc;
     902             :                         }
     903         882 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
     904             :                         int rc;
     905             : 
     906         882 :                         rc = blob_deserialize_xattr(blob,
     907             :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, true);
     908         882 :                         if (rc != 0) {
     909           0 :                                 return rc;
     910             :                         }
     911             :                 } else {
     912             :                         /* Unrecognized descriptor type.  Do not fail - just continue to the
     913             :                          *  next descriptor.  If this descriptor is associated with some feature
     914             :                          *  defined in a newer version of blobstore, that version of blobstore
     915             :                          *  should create and set an associated feature flag to specify if this
     916             :                          *  blob can be loaded or not.
     917             :                          */
     918             :                 }
     919             : 
     920             :                 /* Advance to the next descriptor */
     921        8904 :                 cur_desc += sizeof(*desc) + desc->length;
     922        8904 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
     923          40 :                         break;
     924             :                 }
     925        8864 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
     926             :         }
     927             : 
     928        4572 :         return 0;
     929             : }
     930             : 
     931             : static bool bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page);
     932             : 
     933             : static int
     934        1046 : blob_parse_extent_page(struct spdk_blob_md_page *extent_page, struct spdk_blob *blob)
     935             : {
     936        1046 :         assert(blob != NULL);
     937        1046 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     938             : 
     939        1046 :         if (bs_load_cur_extent_page_valid(extent_page) == false) {
     940           0 :                 return -ENOENT;
     941             :         }
     942             : 
     943        1046 :         return blob_parse_page(extent_page, blob);
     944             : }
     945             : 
     946             : static int
     947        3438 : blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count,
     948             :            struct spdk_blob *blob)
     949             : {
     950             :         const struct spdk_blob_md_page *page;
     951             :         uint32_t i;
     952             :         int rc;
     953             :         void *tmp;
     954             : 
     955        3438 :         assert(page_count > 0);
     956        3438 :         assert(pages[0].sequence_num == 0);
     957        3438 :         assert(blob != NULL);
     958        3438 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     959        3438 :         assert(blob->active.clusters == NULL);
     960             : 
     961             :         /* The blobid provided doesn't match what's in the MD, this can
     962             :          * happen for example if a bogus blobid is passed in through open.
     963             :          */
     964        3438 :         if (blob->id != pages[0].id) {
     965           4 :                 SPDK_ERRLOG("Blobid (0x%" PRIx64 ") doesn't match what's in metadata "
     966             :                             "(0x%" PRIx64 ")\n", blob->id, pages[0].id);
     967           4 :                 return -ENOENT;
     968             :         }
     969             : 
     970        3434 :         tmp = realloc(blob->active.pages, page_count * sizeof(*blob->active.pages));
     971        3434 :         if (!tmp) {
     972           0 :                 return -ENOMEM;
     973             :         }
     974        3434 :         blob->active.pages = tmp;
     975             : 
     976        3434 :         blob->active.pages[0] = pages[0].id;
     977             : 
     978        3534 :         for (i = 1; i < page_count; i++) {
     979         100 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, pages[i - 1].next));
     980         100 :                 blob->active.pages[i] = pages[i - 1].next;
     981             :         }
     982        3434 :         blob->active.num_pages = page_count;
     983             : 
     984        6960 :         for (i = 0; i < page_count; i++) {
     985        3534 :                 page = &pages[i];
     986             : 
     987        3534 :                 assert(page->id == blob->id);
     988        3534 :                 assert(page->sequence_num == i);
     989             : 
     990        3534 :                 rc = blob_parse_page(page, blob);
     991        3534 :                 if (rc != 0) {
     992           8 :                         return rc;
     993             :                 }
     994             :         }
     995             : 
     996        3426 :         return 0;
     997             : }
     998             : 
     999             : static int
    1000        4350 : blob_serialize_add_page(const struct spdk_blob *blob,
    1001             :                         struct spdk_blob_md_page **pages,
    1002             :                         uint32_t *page_count,
    1003             :                         struct spdk_blob_md_page **last_page)
    1004             : {
    1005             :         struct spdk_blob_md_page *page, *tmp_pages;
    1006             : 
    1007        4350 :         assert(pages != NULL);
    1008        4350 :         assert(page_count != NULL);
    1009             : 
    1010        4350 :         *last_page = NULL;
    1011        4350 :         if (*page_count == 0) {
    1012        4262 :                 assert(*pages == NULL);
    1013        4262 :                 *pages = spdk_malloc(SPDK_BS_PAGE_SIZE, 0,
    1014             :                                      NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    1015        4262 :                 if (*pages == NULL) {
    1016           0 :                         return -ENOMEM;
    1017             :                 }
    1018        4262 :                 *page_count = 1;
    1019             :         } else {
    1020          88 :                 assert(*pages != NULL);
    1021          88 :                 tmp_pages = spdk_realloc(*pages, SPDK_BS_PAGE_SIZE * (*page_count + 1), 0);
    1022          88 :                 if (tmp_pages == NULL) {
    1023           0 :                         return -ENOMEM;
    1024             :                 }
    1025          88 :                 (*page_count)++;
    1026          88 :                 *pages = tmp_pages;
    1027             :         }
    1028             : 
    1029        4350 :         page = &(*pages)[*page_count - 1];
    1030        4350 :         memset(page, 0, sizeof(*page));
    1031        4350 :         page->id = blob->id;
    1032        4350 :         page->sequence_num = *page_count - 1;
    1033        4350 :         page->next = SPDK_INVALID_MD_PAGE;
    1034        4350 :         *last_page = page;
    1035             : 
    1036        4350 :         return 0;
    1037             : }
    1038             : 
    1039             : /* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor.
    1040             :  * Update required_sz on both success and failure.
    1041             :  *
    1042             :  */
    1043             : static int
    1044        1771 : blob_serialize_xattr(const struct spdk_xattr *xattr,
    1045             :                      uint8_t *buf, size_t buf_sz,
    1046             :                      size_t *required_sz, bool internal)
    1047             : {
    1048             :         struct spdk_blob_md_descriptor_xattr    *desc;
    1049             : 
    1050        1771 :         *required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) +
    1051        1771 :                        strlen(xattr->name) +
    1052        1771 :                        xattr->value_len;
    1053             : 
    1054        1771 :         if (buf_sz < *required_sz) {
    1055          48 :                 return -1;
    1056             :         }
    1057             : 
    1058        1723 :         desc = (struct spdk_blob_md_descriptor_xattr *)buf;
    1059             : 
    1060        1723 :         desc->type = internal ? SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL : SPDK_MD_DESCRIPTOR_TYPE_XATTR;
    1061        1723 :         desc->length = sizeof(desc->name_length) +
    1062             :                        sizeof(desc->value_length) +
    1063        1723 :                        strlen(xattr->name) +
    1064        1723 :                        xattr->value_len;
    1065        1723 :         desc->name_length = strlen(xattr->name);
    1066        1723 :         desc->value_length = xattr->value_len;
    1067             : 
    1068        1723 :         memcpy(desc->name, xattr->name, desc->name_length);
    1069        1723 :         memcpy((void *)((uintptr_t)desc->name + desc->name_length),
    1070        1723 :                xattr->value,
    1071        1723 :                desc->value_length);
    1072             : 
    1073        1723 :         return 0;
    1074             : }
    1075             : 
    1076             : static void
    1077        1685 : blob_serialize_extent_table_entry(const struct spdk_blob *blob,
    1078             :                                   uint64_t start_ep, uint64_t *next_ep,
    1079             :                                   uint8_t **buf, size_t *remaining_sz)
    1080             : {
    1081             :         struct spdk_blob_md_descriptor_extent_table *desc;
    1082             :         size_t cur_sz;
    1083             :         uint64_t i, et_idx;
    1084             :         uint32_t extent_page, ep_len;
    1085             : 
    1086             :         /* The buffer must have room for at least num_clusters entry */
    1087        1685 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->num_clusters);
    1088        1685 :         if (*remaining_sz < cur_sz) {
    1089          20 :                 *next_ep = start_ep;
    1090          20 :                 return;
    1091             :         }
    1092             : 
    1093        1665 :         desc = (struct spdk_blob_md_descriptor_extent_table *)*buf;
    1094        1665 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE;
    1095             : 
    1096        1665 :         desc->num_clusters = blob->active.num_clusters;
    1097             : 
    1098        1665 :         ep_len = 1;
    1099        1665 :         et_idx = 0;
    1100        4236 :         for (i = start_ep; i < blob->active.num_extent_pages; i++) {
    1101        2571 :                 if (*remaining_sz < cur_sz  + sizeof(desc->extent_page[0])) {
    1102             :                         /* If we ran out of buffer space, return */
    1103           0 :                         break;
    1104             :                 }
    1105             : 
    1106        2571 :                 extent_page = blob->active.extent_pages[i];
    1107             :                 /* Verify that next extent_page is unallocated */
    1108        2571 :                 if (extent_page == 0 &&
    1109        1518 :                     (i + 1 < blob->active.num_extent_pages && blob->active.extent_pages[i + 1] == 0)) {
    1110        1078 :                         ep_len++;
    1111        1078 :                         continue;
    1112             :                 }
    1113        1493 :                 desc->extent_page[et_idx].page_idx = extent_page;
    1114        1493 :                 desc->extent_page[et_idx].num_pages = ep_len;
    1115        1493 :                 et_idx++;
    1116             : 
    1117        1493 :                 ep_len = 1;
    1118        1493 :                 cur_sz += sizeof(desc->extent_page[et_idx]);
    1119             :         }
    1120        1665 :         *next_ep = i;
    1121             : 
    1122        1665 :         desc->length = sizeof(desc->num_clusters) + sizeof(desc->extent_page[0]) * et_idx;
    1123        1665 :         *remaining_sz -= sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1124        1665 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1125             : }
    1126             : 
    1127             : static int
    1128        1667 : blob_serialize_extent_table(const struct spdk_blob *blob,
    1129             :                             struct spdk_blob_md_page **pages,
    1130             :                             struct spdk_blob_md_page *cur_page,
    1131             :                             uint32_t *page_count, uint8_t **buf,
    1132             :                             size_t *remaining_sz)
    1133             : {
    1134        1667 :         uint64_t                                last_extent_page;
    1135             :         int                                     rc;
    1136             : 
    1137        1667 :         last_extent_page = 0;
    1138             :         /* At least single extent table entry has to be always persisted.
    1139             :          * Such case occurs with num_extent_pages == 0. */
    1140        1685 :         while (last_extent_page <= blob->active.num_extent_pages) {
    1141        1685 :                 blob_serialize_extent_table_entry(blob, last_extent_page, &last_extent_page, buf,
    1142             :                                                   remaining_sz);
    1143             : 
    1144        1685 :                 if (last_extent_page == blob->active.num_extent_pages) {
    1145        1667 :                         break;
    1146             :                 }
    1147             : 
    1148          18 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1149          18 :                 if (rc < 0) {
    1150           0 :                         return rc;
    1151             :                 }
    1152             : 
    1153          18 :                 *buf = (uint8_t *)cur_page->descriptors;
    1154          18 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1155             :         }
    1156             : 
    1157        1667 :         return 0;
    1158             : }
    1159             : 
    1160             : static void
    1161        1737 : blob_serialize_extent_rle(const struct spdk_blob *blob,
    1162             :                           uint64_t start_cluster, uint64_t *next_cluster,
    1163             :                           uint8_t **buf, size_t *buf_sz)
    1164             : {
    1165             :         struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
    1166             :         size_t cur_sz;
    1167             :         uint64_t i, extent_idx;
    1168             :         uint64_t lba, lba_per_cluster, lba_count;
    1169             : 
    1170             :         /* The buffer must have room for at least one extent */
    1171        1737 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc_extent_rle->extents[0]);
    1172        1737 :         if (*buf_sz < cur_sz) {
    1173          18 :                 *next_cluster = start_cluster;
    1174          18 :                 return;
    1175             :         }
    1176             : 
    1177        1719 :         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)*buf;
    1178        1719 :         desc_extent_rle->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE;
    1179             : 
    1180        1719 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1181             :         /* Assert for scan-build false positive */
    1182        1719 :         assert(lba_per_cluster > 0);
    1183             : 
    1184        1719 :         lba = blob->active.clusters[start_cluster];
    1185        1719 :         lba_count = lba_per_cluster;
    1186        1719 :         extent_idx = 0;
    1187      810450 :         for (i = start_cluster + 1; i < blob->active.num_clusters; i++) {
    1188      808735 :                 if ((lba + lba_count) == blob->active.clusters[i] && lba != 0) {
    1189             :                         /* Run-length encode sequential non-zero LBA */
    1190        7276 :                         lba_count += lba_per_cluster;
    1191        7276 :                         continue;
    1192      801459 :                 } else if (lba == 0 && blob->active.clusters[i] == 0) {
    1193             :                         /* Run-length encode unallocated clusters */
    1194      800266 :                         lba_count += lba_per_cluster;
    1195      800266 :                         continue;
    1196             :                 }
    1197        1193 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1198        1193 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1199        1193 :                 extent_idx++;
    1200             : 
    1201        1193 :                 cur_sz += sizeof(desc_extent_rle->extents[extent_idx]);
    1202             : 
    1203        1193 :                 if (*buf_sz < cur_sz) {
    1204             :                         /* If we ran out of buffer space, return */
    1205           4 :                         *next_cluster = i;
    1206           4 :                         break;
    1207             :                 }
    1208             : 
    1209        1189 :                 lba = blob->active.clusters[i];
    1210        1189 :                 lba_count = lba_per_cluster;
    1211             :         }
    1212             : 
    1213        1719 :         if (*buf_sz >= cur_sz) {
    1214        1715 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1215        1715 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1216        1715 :                 extent_idx++;
    1217             : 
    1218        1715 :                 *next_cluster = blob->active.num_clusters;
    1219             :         }
    1220             : 
    1221        1719 :         desc_extent_rle->length = sizeof(desc_extent_rle->extents[0]) * extent_idx;
    1222        1719 :         *buf_sz -= sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1223        1719 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1224             : }
    1225             : 
    1226             : static int
    1227        1929 : blob_serialize_extents_rle(const struct spdk_blob *blob,
    1228             :                            struct spdk_blob_md_page **pages,
    1229             :                            struct spdk_blob_md_page *cur_page,
    1230             :                            uint32_t *page_count, uint8_t **buf,
    1231             :                            size_t *remaining_sz)
    1232             : {
    1233        1929 :         uint64_t                                last_cluster;
    1234             :         int                                     rc;
    1235             : 
    1236        1929 :         last_cluster = 0;
    1237        1951 :         while (last_cluster < blob->active.num_clusters) {
    1238        1737 :                 blob_serialize_extent_rle(blob, last_cluster, &last_cluster, buf, remaining_sz);
    1239             : 
    1240        1737 :                 if (last_cluster == blob->active.num_clusters) {
    1241        1715 :                         break;
    1242             :                 }
    1243             : 
    1244          22 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1245          22 :                 if (rc < 0) {
    1246           0 :                         return rc;
    1247             :                 }
    1248             : 
    1249          22 :                 *buf = (uint8_t *)cur_page->descriptors;
    1250          22 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1251             :         }
    1252             : 
    1253        1929 :         return 0;
    1254             : }
    1255             : 
    1256             : static void
    1257        1100 : blob_serialize_extent_page(const struct spdk_blob *blob,
    1258             :                            uint64_t cluster, struct spdk_blob_md_page *page)
    1259             : {
    1260             :         struct spdk_blob_md_descriptor_extent_page *desc_extent;
    1261             :         uint64_t i, extent_idx;
    1262             :         uint64_t lba, lba_per_cluster;
    1263        1100 :         uint64_t start_cluster_idx = (cluster / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    1264             : 
    1265        1100 :         desc_extent = (struct spdk_blob_md_descriptor_extent_page *) page->descriptors;
    1266        1100 :         desc_extent->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE;
    1267             : 
    1268        1100 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1269             : 
    1270        1100 :         desc_extent->start_cluster_idx = start_cluster_idx;
    1271        1100 :         extent_idx = 0;
    1272       42406 :         for (i = start_cluster_idx; i < blob->active.num_clusters; i++) {
    1273       41372 :                 lba = blob->active.clusters[i];
    1274       41372 :                 desc_extent->cluster_idx[extent_idx++] = lba / lba_per_cluster;
    1275       41372 :                 if (extent_idx >= SPDK_EXTENTS_PER_EP) {
    1276          66 :                         break;
    1277             :                 }
    1278             :         }
    1279        1100 :         desc_extent->length = sizeof(desc_extent->start_cluster_idx) +
    1280             :                               sizeof(desc_extent->cluster_idx[0]) * extent_idx;
    1281        1100 : }
    1282             : 
    1283             : static void
    1284        3596 : blob_serialize_flags(const struct spdk_blob *blob,
    1285             :                      uint8_t *buf, size_t *buf_sz)
    1286             : {
    1287             :         struct spdk_blob_md_descriptor_flags *desc;
    1288             : 
    1289             :         /*
    1290             :          * Flags get serialized first, so we should always have room for the flags
    1291             :          *  descriptor.
    1292             :          */
    1293        3596 :         assert(*buf_sz >= sizeof(*desc));
    1294             : 
    1295        3596 :         desc = (struct spdk_blob_md_descriptor_flags *)buf;
    1296        3596 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_FLAGS;
    1297        3596 :         desc->length = sizeof(*desc) - sizeof(struct spdk_blob_md_descriptor);
    1298        3596 :         desc->invalid_flags = blob->invalid_flags;
    1299        3596 :         desc->data_ro_flags = blob->data_ro_flags;
    1300        3596 :         desc->md_ro_flags = blob->md_ro_flags;
    1301             : 
    1302        3596 :         *buf_sz -= sizeof(*desc);
    1303        3596 : }
    1304             : 
    1305             : static int
    1306        7192 : blob_serialize_xattrs(const struct spdk_blob *blob,
    1307             :                       const struct spdk_xattr_tailq *xattrs, bool internal,
    1308             :                       struct spdk_blob_md_page **pages,
    1309             :                       struct spdk_blob_md_page *cur_page,
    1310             :                       uint32_t *page_count, uint8_t **buf,
    1311             :                       size_t *remaining_sz)
    1312             : {
    1313             :         const struct spdk_xattr *xattr;
    1314             :         int     rc;
    1315             : 
    1316        8915 :         TAILQ_FOREACH(xattr, xattrs, link) {
    1317        1723 :                 size_t required_sz = 0;
    1318             : 
    1319        1723 :                 rc = blob_serialize_xattr(xattr,
    1320             :                                           *buf, *remaining_sz,
    1321             :                                           &required_sz, internal);
    1322        1723 :                 if (rc < 0) {
    1323             :                         /* Need to add a new page to the chain */
    1324          48 :                         rc = blob_serialize_add_page(blob, pages, page_count,
    1325             :                                                      &cur_page);
    1326          48 :                         if (rc < 0) {
    1327           0 :                                 spdk_free(*pages);
    1328           0 :                                 *pages = NULL;
    1329           0 :                                 *page_count = 0;
    1330           0 :                                 return rc;
    1331             :                         }
    1332             : 
    1333          48 :                         *buf = (uint8_t *)cur_page->descriptors;
    1334          48 :                         *remaining_sz = sizeof(cur_page->descriptors);
    1335             : 
    1336             :                         /* Try again */
    1337          48 :                         required_sz = 0;
    1338          48 :                         rc = blob_serialize_xattr(xattr,
    1339             :                                                   *buf, *remaining_sz,
    1340             :                                                   &required_sz, internal);
    1341             : 
    1342          48 :                         if (rc < 0) {
    1343           0 :                                 spdk_free(*pages);
    1344           0 :                                 *pages = NULL;
    1345           0 :                                 *page_count = 0;
    1346           0 :                                 return rc;
    1347             :                         }
    1348             :                 }
    1349             : 
    1350        1723 :                 *remaining_sz -= required_sz;
    1351        1723 :                 *buf += required_sz;
    1352             :         }
    1353             : 
    1354        7192 :         return 0;
    1355             : }
    1356             : 
    1357             : static int
    1358        3596 : blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages,
    1359             :                uint32_t *page_count)
    1360             : {
    1361        3596 :         struct spdk_blob_md_page                *cur_page;
    1362             :         int                                     rc;
    1363        3596 :         uint8_t                                 *buf;
    1364        3596 :         size_t                                  remaining_sz;
    1365             : 
    1366        3596 :         assert(pages != NULL);
    1367        3596 :         assert(page_count != NULL);
    1368        3596 :         assert(blob != NULL);
    1369        3596 :         assert(blob->state == SPDK_BLOB_STATE_DIRTY);
    1370             : 
    1371        3596 :         *pages = NULL;
    1372        3596 :         *page_count = 0;
    1373             : 
    1374             :         /* A blob always has at least 1 page, even if it has no descriptors */
    1375        3596 :         rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1376        3596 :         if (rc < 0) {
    1377           0 :                 return rc;
    1378             :         }
    1379             : 
    1380        3596 :         buf = (uint8_t *)cur_page->descriptors;
    1381        3596 :         remaining_sz = sizeof(cur_page->descriptors);
    1382             : 
    1383             :         /* Serialize flags */
    1384        3596 :         blob_serialize_flags(blob, buf, &remaining_sz);
    1385        3596 :         buf += sizeof(struct spdk_blob_md_descriptor_flags);
    1386             : 
    1387             :         /* Serialize xattrs */
    1388        3596 :         rc = blob_serialize_xattrs(blob, &blob->xattrs, false,
    1389             :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1390        3596 :         if (rc < 0) {
    1391           0 :                 return rc;
    1392             :         }
    1393             : 
    1394             :         /* Serialize internal xattrs */
    1395        3596 :         rc = blob_serialize_xattrs(blob, &blob->xattrs_internal, true,
    1396             :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1397        3596 :         if (rc < 0) {
    1398           0 :                 return rc;
    1399             :         }
    1400             : 
    1401        3596 :         if (blob->use_extent_table) {
    1402             :                 /* Serialize extent table */
    1403        1667 :                 rc = blob_serialize_extent_table(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1404             :         } else {
    1405             :                 /* Serialize extents */
    1406        1929 :                 rc = blob_serialize_extents_rle(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1407             :         }
    1408             : 
    1409        3596 :         return rc;
    1410             : }
    1411             : 
    1412             : struct spdk_blob_load_ctx {
    1413             :         struct spdk_blob                *blob;
    1414             : 
    1415             :         struct spdk_blob_md_page        *pages;
    1416             :         uint32_t                        num_pages;
    1417             :         uint32_t                        next_extent_page;
    1418             :         spdk_bs_sequence_t              *seq;
    1419             : 
    1420             :         spdk_bs_sequence_cpl            cb_fn;
    1421             :         void                            *cb_arg;
    1422             : };
    1423             : 
    1424             : static uint32_t
    1425       19930 : blob_md_page_calc_crc(void *page)
    1426             : {
    1427             :         uint32_t                crc;
    1428             : 
    1429       19930 :         crc = BLOB_CRC32C_INITIAL;
    1430       19930 :         crc = spdk_crc32c_update(page, SPDK_BS_PAGE_SIZE - 4, crc);
    1431       19930 :         crc ^= BLOB_CRC32C_INITIAL;
    1432             : 
    1433       19930 :         return crc;
    1434             : 
    1435             : }
    1436             : 
    1437             : static void
    1438        3466 : blob_load_final(struct spdk_blob_load_ctx *ctx, int bserrno)
    1439             : {
    1440        3466 :         struct spdk_blob                *blob = ctx->blob;
    1441             : 
    1442        3466 :         if (bserrno == 0) {
    1443        3402 :                 blob_mark_clean(blob);
    1444             :         }
    1445             : 
    1446        3466 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, bserrno);
    1447             : 
    1448             :         /* Free the memory */
    1449        3466 :         spdk_free(ctx->pages);
    1450        3466 :         free(ctx);
    1451        3466 : }
    1452             : 
    1453             : static void
    1454         454 : blob_load_snapshot_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    1455             : {
    1456         454 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1457         454 :         struct spdk_blob                *blob = ctx->blob;
    1458             : 
    1459         454 :         if (bserrno == 0) {
    1460         448 :                 blob->back_bs_dev = bs_create_blob_bs_dev(snapshot);
    1461         448 :                 if (blob->back_bs_dev == NULL) {
    1462           0 :                         bserrno = -ENOMEM;
    1463             :                 }
    1464             :         }
    1465         454 :         if (bserrno != 0) {
    1466           6 :                 SPDK_ERRLOG("Snapshot fail\n");
    1467             :         }
    1468             : 
    1469         454 :         blob_load_final(ctx, bserrno);
    1470         454 : }
    1471             : 
    1472             : static void blob_update_clear_method(struct spdk_blob *blob);
    1473             : 
    1474             : static int
    1475         120 : blob_load_esnap(struct spdk_blob *blob, void *blob_ctx)
    1476             : {
    1477         120 :         struct spdk_blob_store *bs = blob->bs;
    1478         120 :         struct spdk_bs_dev *bs_dev = NULL;
    1479         120 :         const void *esnap_id = NULL;
    1480         120 :         size_t id_len = 0;
    1481             :         int rc;
    1482             : 
    1483         120 :         if (bs->esnap_bs_dev_create == NULL) {
    1484           8 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " is an esnap clone but the blobstore was opened "
    1485             :                                "without support for esnap clones\n", blob->id);
    1486           8 :                 return -ENOTSUP;
    1487             :         }
    1488         112 :         assert(blob->back_bs_dev == NULL);
    1489             : 
    1490         112 :         rc = blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, &esnap_id, &id_len, true);
    1491         112 :         if (rc != 0) {
    1492           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " is an esnap clone but has no esnap ID\n", blob->id);
    1493           0 :                 return -EINVAL;
    1494             :         }
    1495         112 :         assert(id_len > 0 && id_len < UINT32_MAX);
    1496             : 
    1497         112 :         SPDK_INFOLOG(blob, "Creating external snapshot device\n");
    1498             : 
    1499         112 :         rc = bs->esnap_bs_dev_create(bs->esnap_ctx, blob_ctx, blob, esnap_id, (uint32_t)id_len,
    1500             :                                      &bs_dev);
    1501         112 :         if (rc != 0) {
    1502           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": failed to load back_bs_dev "
    1503             :                               "with error %d\n", blob->id, rc);
    1504           0 :                 return rc;
    1505             :         }
    1506             : 
    1507             :         /*
    1508             :          * Note: bs_dev might be NULL if the consumer chose to not open the external snapshot.
    1509             :          * This especially might happen during spdk_bs_load() iteration.
    1510             :          */
    1511         112 :         if (bs_dev != NULL) {
    1512         112 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": loaded back_bs_dev\n", blob->id);
    1513         112 :                 if ((bs->io_unit_size % bs_dev->blocklen) != 0) {
    1514           4 :                         SPDK_NOTICELOG("blob 0x%" PRIx64 " external snapshot device block size %u "
    1515             :                                        "is not compatible with blobstore block size %u\n",
    1516             :                                        blob->id, bs_dev->blocklen, bs->io_unit_size);
    1517           4 :                         bs_dev->destroy(bs_dev);
    1518           4 :                         return -EINVAL;
    1519             :                 }
    1520             :         }
    1521             : 
    1522         108 :         blob->back_bs_dev = bs_dev;
    1523         108 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    1524             : 
    1525         108 :         return 0;
    1526             : }
    1527             : 
    1528             : static void
    1529        3420 : blob_load_backing_dev(spdk_bs_sequence_t *seq, void *cb_arg)
    1530             : {
    1531        3420 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1532        3420 :         struct spdk_blob                *blob = ctx->blob;
    1533        3420 :         const void                      *value;
    1534        3420 :         size_t                          len;
    1535             :         int                             rc;
    1536             : 
    1537        3420 :         if (blob_is_esnap_clone(blob)) {
    1538         120 :                 rc = blob_load_esnap(blob, seq->cpl.u.blob_handle.esnap_ctx);
    1539         120 :                 blob_load_final(ctx, rc);
    1540         120 :                 return;
    1541             :         }
    1542             : 
    1543        3300 :         if (spdk_blob_is_thin_provisioned(blob)) {
    1544        1030 :                 rc = blob_get_xattr_value(blob, BLOB_SNAPSHOT, &value, &len, true);
    1545        1030 :                 if (rc == 0) {
    1546         454 :                         if (len != sizeof(spdk_blob_id)) {
    1547           0 :                                 blob_load_final(ctx, -EINVAL);
    1548           0 :                                 return;
    1549             :                         }
    1550             :                         /* open snapshot blob and continue in the callback function */
    1551         454 :                         blob->parent_id = *(spdk_blob_id *)value;
    1552         454 :                         spdk_bs_open_blob(blob->bs, blob->parent_id,
    1553             :                                           blob_load_snapshot_cpl, ctx);
    1554         454 :                         return;
    1555             :                 } else {
    1556             :                         /* add zeroes_dev for thin provisioned blob */
    1557         576 :                         blob->back_bs_dev = bs_create_zeroes_dev();
    1558             :                 }
    1559             :         } else {
    1560             :                 /* standard blob */
    1561        2270 :                 blob->back_bs_dev = NULL;
    1562             :         }
    1563        2846 :         blob_load_final(ctx, 0);
    1564             : }
    1565             : 
    1566             : static void
    1567        2816 : blob_load_cpl_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1568             : {
    1569        2816 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1570        2816 :         struct spdk_blob                *blob = ctx->blob;
    1571             :         struct spdk_blob_md_page        *page;
    1572             :         uint64_t                        i;
    1573             :         uint32_t                        crc;
    1574             :         uint64_t                        lba;
    1575             :         void                            *tmp;
    1576             :         uint64_t                        sz;
    1577             : 
    1578        2816 :         if (bserrno) {
    1579           6 :                 SPDK_ERRLOG("Extent page read failed: %d\n", bserrno);
    1580           6 :                 blob_load_final(ctx, bserrno);
    1581           6 :                 return;
    1582             :         }
    1583             : 
    1584        2810 :         if (ctx->pages == NULL) {
    1585             :                 /* First iteration of this function, allocate buffer for single EXTENT_PAGE */
    1586        1764 :                 ctx->pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    1587             :                                           NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    1588        1764 :                 if (!ctx->pages) {
    1589           0 :                         blob_load_final(ctx, -ENOMEM);
    1590           0 :                         return;
    1591             :                 }
    1592        1764 :                 ctx->num_pages = 1;
    1593        1764 :                 ctx->next_extent_page = 0;
    1594             :         } else {
    1595        1046 :                 page = &ctx->pages[0];
    1596        1046 :                 crc = blob_md_page_calc_crc(page);
    1597        1046 :                 if (crc != page->crc) {
    1598           0 :                         blob_load_final(ctx, -EINVAL);
    1599           0 :                         return;
    1600             :                 }
    1601             : 
    1602        1046 :                 if (page->next != SPDK_INVALID_MD_PAGE) {
    1603           0 :                         blob_load_final(ctx, -EINVAL);
    1604           0 :                         return;
    1605             :                 }
    1606             : 
    1607        1046 :                 bserrno = blob_parse_extent_page(page, blob);
    1608        1046 :                 if (bserrno) {
    1609           0 :                         blob_load_final(ctx, bserrno);
    1610           0 :                         return;
    1611             :                 }
    1612             :         }
    1613             : 
    1614        3232 :         for (i = ctx->next_extent_page; i < blob->active.num_extent_pages; i++) {
    1615        1474 :                 if (blob->active.extent_pages[i] != 0) {
    1616             :                         /* Extent page was allocated, read and parse it. */
    1617        1052 :                         lba = bs_md_page_to_lba(blob->bs, blob->active.extent_pages[i]);
    1618        1052 :                         ctx->next_extent_page = i + 1;
    1619             : 
    1620        1052 :                         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1621        1052 :                                              bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
    1622             :                                              blob_load_cpl_extents_cpl, ctx);
    1623        1052 :                         return;
    1624             :                 } else {
    1625             :                         /* Thin provisioned blobs can point to unallocated extent pages.
    1626             :                          * In this case blob size should be increased by up to the amount left in remaining_clusters_in_et. */
    1627             : 
    1628         422 :                         sz = spdk_min(blob->remaining_clusters_in_et, SPDK_EXTENTS_PER_EP);
    1629         422 :                         blob->active.num_clusters += sz;
    1630         422 :                         blob->remaining_clusters_in_et -= sz;
    1631             : 
    1632         422 :                         assert(spdk_blob_is_thin_provisioned(blob));
    1633         422 :                         assert(i + 1 < blob->active.num_extent_pages || blob->remaining_clusters_in_et == 0);
    1634             : 
    1635         422 :                         tmp = realloc(blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
    1636         422 :                         if (tmp == NULL) {
    1637           0 :                                 blob_load_final(ctx, -ENOMEM);
    1638           0 :                                 return;
    1639             :                         }
    1640         422 :                         memset(tmp + sizeof(*blob->active.clusters) * blob->active.cluster_array_size, 0,
    1641         422 :                                sizeof(*blob->active.clusters) * (blob->active.num_clusters - blob->active.cluster_array_size));
    1642         422 :                         blob->active.clusters = tmp;
    1643         422 :                         blob->active.cluster_array_size = blob->active.num_clusters;
    1644             :                 }
    1645             :         }
    1646             : 
    1647        1758 :         blob_load_backing_dev(seq, ctx);
    1648             : }
    1649             : 
    1650             : static void
    1651        3566 : blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1652             : {
    1653        3566 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1654        3566 :         struct spdk_blob                *blob = ctx->blob;
    1655             :         struct spdk_blob_md_page        *page;
    1656             :         int                             rc;
    1657             :         uint32_t                        crc;
    1658             :         uint32_t                        current_page;
    1659             : 
    1660        3566 :         if (ctx->num_pages == 1) {
    1661        3466 :                 current_page = bs_blobid_to_page(blob->id);
    1662             :         } else {
    1663         100 :                 assert(ctx->num_pages != 0);
    1664         100 :                 page = &ctx->pages[ctx->num_pages - 2];
    1665         100 :                 current_page = page->next;
    1666             :         }
    1667             : 
    1668        3566 :         if (bserrno) {
    1669          20 :                 SPDK_ERRLOG("Metadata page %d read failed for blobid 0x%" PRIx64 ": %d\n",
    1670             :                             current_page, blob->id, bserrno);
    1671          20 :                 blob_load_final(ctx, bserrno);
    1672          20 :                 return;
    1673             :         }
    1674             : 
    1675        3546 :         page = &ctx->pages[ctx->num_pages - 1];
    1676        3546 :         crc = blob_md_page_calc_crc(page);
    1677        3546 :         if (crc != page->crc) {
    1678           8 :                 SPDK_ERRLOG("Metadata page %d crc mismatch for blobid 0x%" PRIx64 "\n",
    1679             :                             current_page, blob->id);
    1680           8 :                 blob_load_final(ctx, -EINVAL);
    1681           8 :                 return;
    1682             :         }
    1683             : 
    1684        3538 :         if (page->next != SPDK_INVALID_MD_PAGE) {
    1685             :                 struct spdk_blob_md_page *tmp_pages;
    1686         100 :                 uint32_t next_page = page->next;
    1687         100 :                 uint64_t next_lba = bs_md_page_to_lba(blob->bs, next_page);
    1688             : 
    1689             :                 /* Read the next page */
    1690         100 :                 tmp_pages = spdk_realloc(ctx->pages, (sizeof(*page) * (ctx->num_pages + 1)), 0);
    1691         100 :                 if (tmp_pages == NULL) {
    1692           0 :                         blob_load_final(ctx, -ENOMEM);
    1693           0 :                         return;
    1694             :                 }
    1695         100 :                 ctx->num_pages++;
    1696         100 :                 ctx->pages = tmp_pages;
    1697             : 
    1698         100 :                 bs_sequence_read_dev(seq, &ctx->pages[ctx->num_pages - 1],
    1699             :                                      next_lba,
    1700         100 :                                      bs_byte_to_lba(blob->bs, sizeof(*page)),
    1701             :                                      blob_load_cpl, ctx);
    1702         100 :                 return;
    1703             :         }
    1704             : 
    1705             :         /* Parse the pages */
    1706        3438 :         rc = blob_parse(ctx->pages, ctx->num_pages, blob);
    1707        3438 :         if (rc) {
    1708          12 :                 blob_load_final(ctx, rc);
    1709          12 :                 return;
    1710             :         }
    1711             : 
    1712        3426 :         if (blob->extent_table_found == true) {
    1713             :                 /* If EXTENT_TABLE was found, that means support for it should be enabled. */
    1714        1764 :                 assert(blob->extent_rle_found == false);
    1715        1764 :                 blob->use_extent_table = true;
    1716             :         } else {
    1717             :                 /* If EXTENT_RLE or no extent_* descriptor was found disable support
    1718             :                  * for extent table. No extent_* descriptors means that blob has length of 0
    1719             :                  * and no extent_rle descriptors were persisted for it.
    1720             :                  * EXTENT_TABLE if used, is always present in metadata regardless of length. */
    1721        1662 :                 blob->use_extent_table = false;
    1722             :         }
    1723             : 
    1724             :         /* Check the clear_method stored in metadata vs what may have been passed
    1725             :          * via spdk_bs_open_blob_ext() and update accordingly.
    1726             :          */
    1727        3426 :         blob_update_clear_method(blob);
    1728             : 
    1729        3426 :         spdk_free(ctx->pages);
    1730        3426 :         ctx->pages = NULL;
    1731             : 
    1732        3426 :         if (blob->extent_table_found) {
    1733        1764 :                 blob_load_cpl_extents_cpl(seq, ctx, 0);
    1734             :         } else {
    1735        1662 :                 blob_load_backing_dev(seq, ctx);
    1736             :         }
    1737             : }
    1738             : 
    1739             : /* Load a blob from disk given a blobid */
    1740             : static void
    1741        3466 : blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    1742             :           spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    1743             : {
    1744             :         struct spdk_blob_load_ctx *ctx;
    1745             :         struct spdk_blob_store *bs;
    1746             :         uint32_t page_num;
    1747             :         uint64_t lba;
    1748             : 
    1749        3466 :         blob_verify_md_op(blob);
    1750             : 
    1751        3466 :         bs = blob->bs;
    1752             : 
    1753        3466 :         ctx = calloc(1, sizeof(*ctx));
    1754        3466 :         if (!ctx) {
    1755           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1756           0 :                 return;
    1757             :         }
    1758             : 
    1759        3466 :         ctx->blob = blob;
    1760        3466 :         ctx->pages = spdk_realloc(ctx->pages, SPDK_BS_PAGE_SIZE, 0);
    1761        3466 :         if (!ctx->pages) {
    1762           0 :                 free(ctx);
    1763           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1764           0 :                 return;
    1765             :         }
    1766        3466 :         ctx->num_pages = 1;
    1767        3466 :         ctx->cb_fn = cb_fn;
    1768        3466 :         ctx->cb_arg = cb_arg;
    1769        3466 :         ctx->seq = seq;
    1770             : 
    1771        3466 :         page_num = bs_blobid_to_page(blob->id);
    1772        3466 :         lba = bs_md_page_to_lba(blob->bs, page_num);
    1773             : 
    1774        3466 :         blob->state = SPDK_BLOB_STATE_LOADING;
    1775             : 
    1776        3466 :         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1777        3466 :                              bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE),
    1778             :                              blob_load_cpl, ctx);
    1779             : }
    1780             : 
    1781             : struct spdk_blob_persist_ctx {
    1782             :         struct spdk_blob                *blob;
    1783             : 
    1784             :         struct spdk_blob_md_page        *pages;
    1785             :         uint32_t                        next_extent_page;
    1786             :         struct spdk_blob_md_page        *extent_page;
    1787             : 
    1788             :         spdk_bs_sequence_t              *seq;
    1789             :         spdk_bs_sequence_cpl            cb_fn;
    1790             :         void                            *cb_arg;
    1791             :         TAILQ_ENTRY(spdk_blob_persist_ctx) link;
    1792             : };
    1793             : 
    1794             : static void
    1795        1262 : bs_batch_clear_dev(struct spdk_blob *blob, spdk_bs_batch_t *batch, uint64_t lba,
    1796             :                    uint64_t lba_count)
    1797             : {
    1798        1262 :         switch (blob->clear_method) {
    1799        1262 :         case BLOB_CLEAR_WITH_DEFAULT:
    1800             :         case BLOB_CLEAR_WITH_UNMAP:
    1801        1262 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    1802        1262 :                 break;
    1803           0 :         case BLOB_CLEAR_WITH_WRITE_ZEROES:
    1804           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1805           0 :                 break;
    1806           0 :         case BLOB_CLEAR_WITH_NONE:
    1807             :         default:
    1808           0 :                 break;
    1809             :         }
    1810        1262 : }
    1811             : 
    1812             : static int
    1813        1152 : bs_super_validate(struct spdk_bs_super_block *super, struct spdk_blob_store *bs)
    1814             : {
    1815             :         uint32_t        crc;
    1816             :         static const char zeros[SPDK_BLOBSTORE_TYPE_LENGTH];
    1817             : 
    1818        1152 :         if (super->version > SPDK_BS_VERSION ||
    1819        1148 :             super->version < SPDK_BS_INITIAL_VERSION) {
    1820           8 :                 return -EILSEQ;
    1821             :         }
    1822             : 
    1823        1144 :         if (memcmp(super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    1824             :                    sizeof(super->signature)) != 0) {
    1825           0 :                 return -EILSEQ;
    1826             :         }
    1827             : 
    1828        1144 :         crc = blob_md_page_calc_crc(super);
    1829        1144 :         if (crc != super->crc) {
    1830           4 :                 return -EILSEQ;
    1831             :         }
    1832             : 
    1833        1140 :         if (memcmp(&bs->bstype, &super->bstype, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1834        1126 :                 SPDK_DEBUGLOG(blob, "Bstype matched - loading blobstore\n");
    1835          14 :         } else if (memcmp(&bs->bstype, zeros, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1836           6 :                 SPDK_DEBUGLOG(blob, "Bstype wildcard used - loading blobstore regardless bstype\n");
    1837             :         } else {
    1838           8 :                 SPDK_DEBUGLOG(blob, "Unexpected bstype\n");
    1839           8 :                 SPDK_LOGDUMP(blob, "Expected:", bs->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1840           8 :                 SPDK_LOGDUMP(blob, "Found:", super->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1841           8 :                 return -ENXIO;
    1842             :         }
    1843             : 
    1844        1132 :         if (super->size > bs->dev->blockcnt * bs->dev->blocklen) {
    1845           8 :                 SPDK_NOTICELOG("Size mismatch, dev size: %" PRIu64 ", blobstore size: %" PRIu64 "\n",
    1846             :                                bs->dev->blockcnt * bs->dev->blocklen, super->size);
    1847           8 :                 return -EILSEQ;
    1848             :         }
    1849             : 
    1850        1124 :         return 0;
    1851             : }
    1852             : 
    1853             : static void bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    1854             :                           spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    1855             : 
    1856             : static void
    1857        5092 : blob_persist_complete_cb(void *arg)
    1858             : {
    1859        5092 :         struct spdk_blob_persist_ctx *ctx = arg;
    1860             : 
    1861             :         /* Call user callback */
    1862        5092 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, 0);
    1863             : 
    1864             :         /* Free the memory */
    1865        5092 :         spdk_free(ctx->pages);
    1866        5092 :         free(ctx);
    1867        5092 : }
    1868             : 
    1869             : static void blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
    1870             : 
    1871             : static void
    1872        5092 : blob_persist_complete(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx, int bserrno)
    1873             : {
    1874             :         struct spdk_blob_persist_ctx    *next_persist, *tmp;
    1875        5092 :         struct spdk_blob                *blob = ctx->blob;
    1876             : 
    1877        5092 :         if (bserrno == 0) {
    1878        5040 :                 blob_mark_clean(blob);
    1879             :         }
    1880             : 
    1881        5092 :         assert(ctx == TAILQ_FIRST(&blob->persists_to_complete));
    1882             : 
    1883             :         /* Complete all persists that were pending when the current persist started */
    1884       10184 :         TAILQ_FOREACH_SAFE(next_persist, &blob->persists_to_complete, link, tmp) {
    1885        5092 :                 TAILQ_REMOVE(&blob->persists_to_complete, next_persist, link);
    1886        5092 :                 spdk_thread_send_msg(spdk_get_thread(), blob_persist_complete_cb, next_persist);
    1887             :         }
    1888             : 
    1889        5092 :         if (TAILQ_EMPTY(&blob->pending_persists)) {
    1890        5069 :                 return;
    1891             :         }
    1892             : 
    1893             :         /* Queue up all pending persists for completion and start blob persist with first one */
    1894          23 :         TAILQ_SWAP(&blob->persists_to_complete, &blob->pending_persists, spdk_blob_persist_ctx, link);
    1895          23 :         next_persist = TAILQ_FIRST(&blob->persists_to_complete);
    1896             : 
    1897          23 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    1898          23 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, next_persist);
    1899             : }
    1900             : 
    1901             : static void
    1902        5040 : blob_persist_clear_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1903             : {
    1904        5040 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1905        5040 :         struct spdk_blob                *blob = ctx->blob;
    1906        5040 :         struct spdk_blob_store          *bs = blob->bs;
    1907             :         size_t                          i;
    1908             : 
    1909        5040 :         if (bserrno != 0) {
    1910           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1911           0 :                 return;
    1912             :         }
    1913             : 
    1914        5040 :         spdk_spin_lock(&bs->used_lock);
    1915             : 
    1916             :         /* Release all extent_pages that were truncated */
    1917        6774 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1918             :                 /* Nothing to release if it was not allocated */
    1919        1734 :                 if (blob->active.extent_pages[i] != 0) {
    1920         626 :                         bs_release_md_page(bs, blob->active.extent_pages[i]);
    1921             :                 }
    1922             :         }
    1923             : 
    1924        5040 :         spdk_spin_unlock(&bs->used_lock);
    1925             : 
    1926        5040 :         if (blob->active.num_extent_pages == 0) {
    1927        3637 :                 free(blob->active.extent_pages);
    1928        3637 :                 blob->active.extent_pages = NULL;
    1929        3637 :                 blob->active.extent_pages_array_size = 0;
    1930        1403 :         } else if (blob->active.num_extent_pages != blob->active.extent_pages_array_size) {
    1931             : #ifndef __clang_analyzer__
    1932             :                 void *tmp;
    1933             : 
    1934             :                 /* scan-build really can't figure reallocs, workaround it */
    1935           2 :                 tmp = realloc(blob->active.extent_pages, sizeof(uint32_t) * blob->active.num_extent_pages);
    1936           2 :                 assert(tmp != NULL);
    1937           2 :                 blob->active.extent_pages = tmp;
    1938             : #endif
    1939           2 :                 blob->active.extent_pages_array_size = blob->active.num_extent_pages;
    1940             :         }
    1941             : 
    1942        5040 :         blob_persist_complete(seq, ctx, bserrno);
    1943             : }
    1944             : 
    1945             : static void
    1946        5040 : blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    1947             : {
    1948        5040 :         struct spdk_blob                *blob = ctx->blob;
    1949        5040 :         struct spdk_blob_store          *bs = blob->bs;
    1950             :         size_t                          i;
    1951             :         uint64_t                        lba;
    1952             :         uint64_t                        lba_count;
    1953             :         spdk_bs_batch_t                 *batch;
    1954             : 
    1955        5040 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx);
    1956        5040 :         lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
    1957             : 
    1958             :         /* Clear all extent_pages that were truncated */
    1959        6774 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1960             :                 /* Nothing to clear if it was not allocated */
    1961        1734 :                 if (blob->active.extent_pages[i] != 0) {
    1962         626 :                         lba = bs_md_page_to_lba(bs, blob->active.extent_pages[i]);
    1963         626 :                         bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1964             :                 }
    1965             :         }
    1966             : 
    1967        5040 :         bs_batch_close(batch);
    1968        5040 : }
    1969             : 
    1970             : static void
    1971        5040 : blob_persist_clear_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1972             : {
    1973        5040 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1974        5040 :         struct spdk_blob                *blob = ctx->blob;
    1975        5040 :         struct spdk_blob_store          *bs = blob->bs;
    1976             :         size_t                          i;
    1977             : 
    1978        5040 :         if (bserrno != 0) {
    1979           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1980           0 :                 return;
    1981             :         }
    1982             : 
    1983        5040 :         spdk_spin_lock(&bs->used_lock);
    1984             :         /* Release all clusters that were truncated */
    1985     1074047 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    1986     1069007 :                 uint32_t cluster_num = bs_lba_to_cluster(bs, blob->active.clusters[i]);
    1987             : 
    1988             :                 /* Nothing to release if it was not allocated */
    1989     1069007 :                 if (blob->active.clusters[i] != 0) {
    1990        2343 :                         bs_release_cluster(bs, cluster_num);
    1991             :                 }
    1992             :         }
    1993        5040 :         spdk_spin_unlock(&bs->used_lock);
    1994             : 
    1995        5040 :         if (blob->active.num_clusters == 0) {
    1996        1940 :                 free(blob->active.clusters);
    1997        1940 :                 blob->active.clusters = NULL;
    1998        1940 :                 blob->active.cluster_array_size = 0;
    1999        3100 :         } else if (blob->active.num_clusters != blob->active.cluster_array_size) {
    2000             : #ifndef __clang_analyzer__
    2001             :                 void *tmp;
    2002             : 
    2003             :                 /* scan-build really can't figure reallocs, workaround it */
    2004          14 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * blob->active.num_clusters);
    2005          14 :                 assert(tmp != NULL);
    2006          14 :                 blob->active.clusters = tmp;
    2007             : 
    2008             : #endif
    2009          14 :                 blob->active.cluster_array_size = blob->active.num_clusters;
    2010             :         }
    2011             : 
    2012             :         /* Move on to clearing extent pages */
    2013        5040 :         blob_persist_clear_extents(seq, ctx);
    2014             : }
    2015             : 
    2016             : static void
    2017        5040 : blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2018             : {
    2019        5040 :         struct spdk_blob                *blob = ctx->blob;
    2020        5040 :         struct spdk_blob_store          *bs = blob->bs;
    2021             :         spdk_bs_batch_t                 *batch;
    2022             :         size_t                          i;
    2023             :         uint64_t                        lba;
    2024             :         uint64_t                        lba_count;
    2025             : 
    2026             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2027             :          * at the end, but no changes ever occur in the middle of the list.
    2028             :          */
    2029             : 
    2030        5040 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_clusters_cpl, ctx);
    2031             : 
    2032             :         /* Clear all clusters that were truncated */
    2033        5040 :         lba = 0;
    2034        5040 :         lba_count = 0;
    2035     1074047 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    2036     1069007 :                 uint64_t next_lba = blob->active.clusters[i];
    2037     1069007 :                 uint64_t next_lba_count = bs_cluster_to_lba(bs, 1);
    2038             : 
    2039     1069007 :                 if (next_lba > 0 && (lba + lba_count) == next_lba) {
    2040             :                         /* This cluster is contiguous with the previous one. */
    2041        1085 :                         lba_count += next_lba_count;
    2042        1085 :                         continue;
    2043     1067922 :                 } else if (next_lba == 0) {
    2044     1066664 :                         continue;
    2045             :                 }
    2046             : 
    2047             :                 /* This cluster is not contiguous with the previous one. */
    2048             : 
    2049             :                 /* If a run of LBAs previously existing, clear them now */
    2050        1258 :                 if (lba_count > 0) {
    2051          36 :                         bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2052             :                 }
    2053             : 
    2054             :                 /* Start building the next batch */
    2055        1258 :                 lba = next_lba;
    2056        1258 :                 if (next_lba > 0) {
    2057        1258 :                         lba_count = next_lba_count;
    2058             :                 } else {
    2059           0 :                         lba_count = 0;
    2060             :                 }
    2061             :         }
    2062             : 
    2063             :         /* If we ended with a contiguous set of LBAs, clear them now */
    2064        5040 :         if (lba_count > 0) {
    2065        1222 :                 bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2066             :         }
    2067             : 
    2068        5040 :         bs_batch_close(batch);
    2069        5040 : }
    2070             : 
    2071             : static void
    2072        5044 : blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2073             : {
    2074        5044 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2075        5044 :         struct spdk_blob                *blob = ctx->blob;
    2076        5044 :         struct spdk_blob_store          *bs = blob->bs;
    2077             :         size_t                          i;
    2078             : 
    2079        5044 :         if (bserrno != 0) {
    2080           4 :                 blob_persist_complete(seq, ctx, bserrno);
    2081           4 :                 return;
    2082             :         }
    2083             : 
    2084        5040 :         spdk_spin_lock(&bs->used_lock);
    2085             : 
    2086             :         /* This loop starts at 1 because the first page is special and handled
    2087             :          * below. The pages (except the first) are never written in place,
    2088             :          * so any pages in the clean list must be zeroed.
    2089             :          */
    2090        5108 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2091          68 :                 bs_release_md_page(bs, blob->clean.pages[i]);
    2092             :         }
    2093             : 
    2094        5040 :         if (blob->active.num_pages == 0) {
    2095             :                 uint32_t page_num;
    2096             : 
    2097        1484 :                 page_num = bs_blobid_to_page(blob->id);
    2098        1484 :                 bs_release_md_page(bs, page_num);
    2099             :         }
    2100             : 
    2101        5040 :         spdk_spin_unlock(&bs->used_lock);
    2102             : 
    2103             :         /* Move on to clearing clusters */
    2104        5040 :         blob_persist_clear_clusters(seq, ctx);
    2105             : }
    2106             : 
    2107             : static void
    2108        5084 : blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2109             : {
    2110        5084 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2111        5084 :         struct spdk_blob                *blob = ctx->blob;
    2112        5084 :         struct spdk_blob_store          *bs = blob->bs;
    2113             :         uint64_t                        lba;
    2114             :         uint64_t                        lba_count;
    2115             :         spdk_bs_batch_t                 *batch;
    2116             :         size_t                          i;
    2117             : 
    2118        5084 :         if (bserrno != 0) {
    2119          40 :                 blob_persist_complete(seq, ctx, bserrno);
    2120          40 :                 return;
    2121             :         }
    2122             : 
    2123        5044 :         batch = bs_sequence_to_batch(seq, blob_persist_zero_pages_cpl, ctx);
    2124             : 
    2125        5044 :         lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
    2126             : 
    2127             :         /* This loop starts at 1 because the first page is special and handled
    2128             :          * below. The pages (except the first) are never written in place,
    2129             :          * so any pages in the clean list must be zeroed.
    2130             :          */
    2131        5112 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2132          68 :                 lba = bs_md_page_to_lba(bs, blob->clean.pages[i]);
    2133             : 
    2134          68 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2135             :         }
    2136             : 
    2137             :         /* The first page will only be zeroed if this is a delete. */
    2138        5044 :         if (blob->active.num_pages == 0) {
    2139             :                 uint32_t page_num;
    2140             : 
    2141             :                 /* The first page in the metadata goes where the blobid indicates */
    2142        1488 :                 page_num = bs_blobid_to_page(blob->id);
    2143        1488 :                 lba = bs_md_page_to_lba(bs, page_num);
    2144             : 
    2145        1488 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2146             :         }
    2147             : 
    2148        5044 :         bs_batch_close(batch);
    2149             : }
    2150             : 
    2151             : static void
    2152        3596 : blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2153             : {
    2154        3596 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2155        3596 :         struct spdk_blob                *blob = ctx->blob;
    2156        3596 :         struct spdk_blob_store          *bs = blob->bs;
    2157             :         uint64_t                        lba;
    2158             :         uint32_t                        lba_count;
    2159             :         struct spdk_blob_md_page        *page;
    2160             : 
    2161        3596 :         if (bserrno != 0) {
    2162           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2163           0 :                 return;
    2164             :         }
    2165             : 
    2166        3596 :         if (blob->active.num_pages == 0) {
    2167             :                 /* Move on to the next step */
    2168           0 :                 blob_persist_zero_pages(seq, ctx, 0);
    2169           0 :                 return;
    2170             :         }
    2171             : 
    2172        3596 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2173             : 
    2174        3596 :         page = &ctx->pages[0];
    2175             :         /* The first page in the metadata goes where the blobid indicates */
    2176        3596 :         lba = bs_md_page_to_lba(bs, bs_blobid_to_page(blob->id));
    2177             : 
    2178        3596 :         bs_sequence_write_dev(seq, page, lba, lba_count,
    2179             :                               blob_persist_zero_pages, ctx);
    2180             : }
    2181             : 
    2182             : static void
    2183        3596 : blob_persist_write_page_chain(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2184             : {
    2185        3596 :         struct spdk_blob                *blob = ctx->blob;
    2186        3596 :         struct spdk_blob_store          *bs = blob->bs;
    2187             :         uint64_t                        lba;
    2188             :         uint32_t                        lba_count;
    2189             :         struct spdk_blob_md_page        *page;
    2190             :         spdk_bs_batch_t                 *batch;
    2191             :         size_t                          i;
    2192             : 
    2193             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2194             :          * at the end, but no changes ever occur in the middle of the list.
    2195             :          */
    2196             : 
    2197        3596 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2198             : 
    2199        3596 :         batch = bs_sequence_to_batch(seq, blob_persist_write_page_root, ctx);
    2200             : 
    2201             :         /* This starts at 1. The root page is not written until
    2202             :          * all of the others are finished
    2203             :          */
    2204        3684 :         for (i = 1; i < blob->active.num_pages; i++) {
    2205          88 :                 page = &ctx->pages[i];
    2206          88 :                 assert(page->sequence_num == i);
    2207             : 
    2208          88 :                 lba = bs_md_page_to_lba(bs, blob->active.pages[i]);
    2209             : 
    2210          88 :                 bs_batch_write_dev(batch, page, lba, lba_count);
    2211             :         }
    2212             : 
    2213        3596 :         bs_batch_close(batch);
    2214        3596 : }
    2215             : 
    2216             : static int
    2217        3568 : blob_resize(struct spdk_blob *blob, uint64_t sz)
    2218             : {
    2219             :         uint64_t        i;
    2220             :         uint64_t        *tmp;
    2221        3568 :         uint64_t        cluster;
    2222        3568 :         uint32_t        lfmd; /*  lowest free md page */
    2223             :         uint64_t        num_clusters;
    2224             :         uint32_t        *ep_tmp;
    2225        3568 :         uint64_t        new_num_ep = 0, current_num_ep = 0;
    2226             :         struct spdk_blob_store *bs;
    2227             :         int             rc;
    2228             : 
    2229        3568 :         bs = blob->bs;
    2230             : 
    2231        3568 :         blob_verify_md_op(blob);
    2232             : 
    2233        3568 :         if (blob->active.num_clusters == sz) {
    2234         456 :                 return 0;
    2235             :         }
    2236             : 
    2237        3112 :         if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2238             :                 /* If this blob was resized to be larger, then smaller, then
    2239             :                  * larger without syncing, then the cluster array already
    2240             :                  * contains spare assigned clusters we can use.
    2241             :                  */
    2242           0 :                 num_clusters = spdk_min(blob->active.cluster_array_size,
    2243             :                                         sz);
    2244             :         } else {
    2245        3112 :                 num_clusters = blob->active.num_clusters;
    2246             :         }
    2247             : 
    2248        3112 :         if (blob->use_extent_table) {
    2249             :                 /* Round up since every cluster beyond current Extent Table size,
    2250             :                  * requires new extent page. */
    2251        1578 :                 new_num_ep = spdk_divide_round_up(sz, SPDK_EXTENTS_PER_EP);
    2252        1578 :                 current_num_ep = spdk_divide_round_up(num_clusters, SPDK_EXTENTS_PER_EP);
    2253             :         }
    2254             : 
    2255        3112 :         assert(!spdk_spin_held(&bs->used_lock));
    2256             : 
    2257             :         /* Check first that we have enough clusters and md pages before we start claiming them.
    2258             :          * bs->used_lock is held to ensure that clusters we think are free are still free when we go
    2259             :          * to claim them later in this function.
    2260             :          */
    2261        3112 :         if (sz > num_clusters && spdk_blob_is_thin_provisioned(blob) == false) {
    2262        1302 :                 spdk_spin_lock(&bs->used_lock);
    2263        1302 :                 if ((sz - num_clusters) > bs->num_free_clusters) {
    2264           8 :                         rc = -ENOSPC;
    2265           8 :                         goto out;
    2266             :                 }
    2267        1294 :                 lfmd = 0;
    2268        1938 :                 for (i = current_num_ep; i < new_num_ep ; i++) {
    2269         644 :                         lfmd = spdk_bit_array_find_first_clear(blob->bs->used_md_pages, lfmd);
    2270         644 :                         if (lfmd == UINT32_MAX) {
    2271             :                                 /* No more free md pages. Cannot satisfy the request */
    2272           0 :                                 rc = -ENOSPC;
    2273           0 :                                 goto out;
    2274             :                         }
    2275             :                 }
    2276             :         }
    2277             : 
    2278        3104 :         if (sz > num_clusters) {
    2279             :                 /* Expand the cluster array if necessary.
    2280             :                  * We only shrink the array when persisting.
    2281             :                  */
    2282        1702 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * sz);
    2283        1702 :                 if (sz > 0 && tmp == NULL) {
    2284           0 :                         rc = -ENOMEM;
    2285           0 :                         goto out;
    2286             :                 }
    2287        1702 :                 memset(tmp + blob->active.cluster_array_size, 0,
    2288        1702 :                        sizeof(*blob->active.clusters) * (sz - blob->active.cluster_array_size));
    2289        1702 :                 blob->active.clusters = tmp;
    2290        1702 :                 blob->active.cluster_array_size = sz;
    2291             : 
    2292             :                 /* Expand the extents table, only if enough clusters were added */
    2293        1702 :                 if (new_num_ep > current_num_ep && blob->use_extent_table) {
    2294         840 :                         ep_tmp = realloc(blob->active.extent_pages, sizeof(*blob->active.extent_pages) * new_num_ep);
    2295         840 :                         if (new_num_ep > 0 && ep_tmp == NULL) {
    2296           0 :                                 rc = -ENOMEM;
    2297           0 :                                 goto out;
    2298             :                         }
    2299         840 :                         memset(ep_tmp + blob->active.extent_pages_array_size, 0,
    2300         840 :                                sizeof(*blob->active.extent_pages) * (new_num_ep - blob->active.extent_pages_array_size));
    2301         840 :                         blob->active.extent_pages = ep_tmp;
    2302         840 :                         blob->active.extent_pages_array_size = new_num_ep;
    2303             :                 }
    2304             :         }
    2305             : 
    2306        3104 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    2307             : 
    2308        3104 :         if (spdk_blob_is_thin_provisioned(blob) == false) {
    2309        2428 :                 cluster = 0;
    2310        2428 :                 lfmd = 0;
    2311        9832 :                 for (i = num_clusters; i < sz; i++) {
    2312        7404 :                         bs_allocate_cluster(blob, i, &cluster, &lfmd, true);
    2313             :                         /* Do not increment lfmd here.  lfmd will get updated
    2314             :                          * to the md_page allocated (if any) when a new extent
    2315             :                          * page is needed.  Just pass that value again,
    2316             :                          * bs_allocate_cluster will just start at that index
    2317             :                          * to find the next free md_page when needed.
    2318             :                          */
    2319             :                 }
    2320             :         }
    2321             : 
    2322             :         /* If we are shrinking the blob, we must adjust num_allocated_clusters */
    2323     1072151 :         for (i = sz; i < num_clusters; i++) {
    2324     1069047 :                 if (blob->active.clusters[i] != 0) {
    2325        2343 :                         blob->active.num_allocated_clusters--;
    2326             :                 }
    2327             :         }
    2328             : 
    2329        3104 :         blob->active.num_clusters = sz;
    2330        3104 :         blob->active.num_extent_pages = new_num_ep;
    2331             : 
    2332        3104 :         rc = 0;
    2333        3112 : out:
    2334        3112 :         if (spdk_spin_held(&bs->used_lock)) {
    2335        1302 :                 spdk_spin_unlock(&bs->used_lock);
    2336             :         }
    2337             : 
    2338        3112 :         return rc;
    2339             : }
    2340             : 
    2341             : static void
    2342        3596 : blob_persist_generate_new_md(struct spdk_blob_persist_ctx *ctx)
    2343             : {
    2344        3596 :         spdk_bs_sequence_t *seq = ctx->seq;
    2345        3596 :         struct spdk_blob *blob = ctx->blob;
    2346        3596 :         struct spdk_blob_store *bs = blob->bs;
    2347             :         uint64_t i;
    2348             :         uint32_t page_num;
    2349             :         void *tmp;
    2350             :         int rc;
    2351             : 
    2352             :         /* Generate the new metadata */
    2353        3596 :         rc = blob_serialize(blob, &ctx->pages, &blob->active.num_pages);
    2354        3596 :         if (rc < 0) {
    2355           0 :                 blob_persist_complete(seq, ctx, rc);
    2356           0 :                 return;
    2357             :         }
    2358             : 
    2359        3596 :         assert(blob->active.num_pages >= 1);
    2360             : 
    2361             :         /* Resize the cache of page indices */
    2362        3596 :         tmp = realloc(blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
    2363        3596 :         if (!tmp) {
    2364           0 :                 blob_persist_complete(seq, ctx, -ENOMEM);
    2365           0 :                 return;
    2366             :         }
    2367        3596 :         blob->active.pages = tmp;
    2368             : 
    2369             :         /* Assign this metadata to pages. This requires two passes - one to verify that there are
    2370             :          * enough pages and a second to actually claim them. The used_lock is held across
    2371             :          * both passes to ensure things don't change in the middle.
    2372             :          */
    2373        3596 :         spdk_spin_lock(&bs->used_lock);
    2374        3596 :         page_num = 0;
    2375             :         /* Note that this loop starts at one. The first page location is fixed by the blobid. */
    2376        3684 :         for (i = 1; i < blob->active.num_pages; i++) {
    2377          88 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2378          88 :                 if (page_num == UINT32_MAX) {
    2379           0 :                         spdk_spin_unlock(&bs->used_lock);
    2380           0 :                         blob_persist_complete(seq, ctx, -ENOMEM);
    2381           0 :                         return;
    2382             :                 }
    2383          88 :                 page_num++;
    2384             :         }
    2385             : 
    2386        3596 :         page_num = 0;
    2387        3596 :         blob->active.pages[0] = bs_blobid_to_page(blob->id);
    2388        3684 :         for (i = 1; i < blob->active.num_pages; i++) {
    2389          88 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2390          88 :                 ctx->pages[i - 1].next = page_num;
    2391             :                 /* Now that previous metadata page is complete, calculate the crc for it. */
    2392          88 :                 ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2393          88 :                 blob->active.pages[i] = page_num;
    2394          88 :                 bs_claim_md_page(bs, page_num);
    2395          88 :                 SPDK_DEBUGLOG(blob, "Claiming page %u for blob 0x%" PRIx64 "\n", page_num,
    2396             :                               blob->id);
    2397          88 :                 page_num++;
    2398             :         }
    2399        3596 :         spdk_spin_unlock(&bs->used_lock);
    2400        3596 :         ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2401             :         /* Start writing the metadata from last page to first */
    2402        3596 :         blob->state = SPDK_BLOB_STATE_CLEAN;
    2403        3596 :         blob_persist_write_page_chain(seq, ctx);
    2404             : }
    2405             : 
    2406             : static void
    2407        2354 : blob_persist_write_extent_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2408             : {
    2409        2354 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2410        2354 :         struct spdk_blob                *blob = ctx->blob;
    2411             :         size_t                          i;
    2412             :         uint32_t                        extent_page_id;
    2413        2354 :         uint32_t                        page_count = 0;
    2414             :         int                             rc;
    2415             : 
    2416        2354 :         if (ctx->extent_page != NULL) {
    2417         666 :                 spdk_free(ctx->extent_page);
    2418         666 :                 ctx->extent_page = NULL;
    2419             :         }
    2420             : 
    2421        2354 :         if (bserrno != 0) {
    2422           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2423           0 :                 return;
    2424             :         }
    2425             : 
    2426             :         /* Only write out Extent Pages when blob was resized. */
    2427        4608 :         for (i = ctx->next_extent_page; i < blob->active.extent_pages_array_size; i++) {
    2428        2920 :                 extent_page_id = blob->active.extent_pages[i];
    2429        2920 :                 if (extent_page_id == 0) {
    2430             :                         /* No Extent Page to persist */
    2431        2254 :                         assert(spdk_blob_is_thin_provisioned(blob));
    2432        2254 :                         continue;
    2433             :                 }
    2434         666 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, extent_page_id));
    2435         666 :                 ctx->next_extent_page = i + 1;
    2436         666 :                 rc = blob_serialize_add_page(ctx->blob, &ctx->extent_page, &page_count, &ctx->extent_page);
    2437         666 :                 if (rc < 0) {
    2438           0 :                         blob_persist_complete(seq, ctx, rc);
    2439           0 :                         return;
    2440             :                 }
    2441             : 
    2442         666 :                 blob->state = SPDK_BLOB_STATE_DIRTY;
    2443         666 :                 blob_serialize_extent_page(blob, i * SPDK_EXTENTS_PER_EP, ctx->extent_page);
    2444             : 
    2445         666 :                 ctx->extent_page->crc = blob_md_page_calc_crc(ctx->extent_page);
    2446             : 
    2447         666 :                 bs_sequence_write_dev(seq, ctx->extent_page, bs_md_page_to_lba(blob->bs, extent_page_id),
    2448         666 :                                       bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
    2449             :                                       blob_persist_write_extent_pages, ctx);
    2450         666 :                 return;
    2451             :         }
    2452             : 
    2453        1688 :         blob_persist_generate_new_md(ctx);
    2454             : }
    2455             : 
    2456             : static void
    2457        5092 : blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2458             : {
    2459        5092 :         struct spdk_blob_persist_ctx *ctx = cb_arg;
    2460        5092 :         struct spdk_blob *blob = ctx->blob;
    2461             : 
    2462        5092 :         if (bserrno != 0) {
    2463           8 :                 blob_persist_complete(seq, ctx, bserrno);
    2464           8 :                 return;
    2465             :         }
    2466             : 
    2467        5084 :         if (blob->active.num_pages == 0) {
    2468             :                 /* This is the signal that the blob should be deleted.
    2469             :                  * Immediately jump to the clean up routine. */
    2470        1488 :                 assert(blob->clean.num_pages > 0);
    2471        1488 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
    2472        1488 :                 blob_persist_zero_pages(seq, ctx, 0);
    2473        1488 :                 return;
    2474             : 
    2475             :         }
    2476             : 
    2477        3596 :         if (blob->clean.num_clusters < blob->active.num_clusters) {
    2478             :                 /* Blob was resized up */
    2479        1674 :                 assert(blob->clean.num_extent_pages <= blob->active.num_extent_pages);
    2480        1674 :                 ctx->next_extent_page = spdk_max(1, blob->clean.num_extent_pages) - 1;
    2481        1922 :         } else if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2482             :                 /* Blob was resized down */
    2483          14 :                 assert(blob->clean.num_extent_pages >= blob->active.num_extent_pages);
    2484          14 :                 ctx->next_extent_page = spdk_max(1, blob->active.num_extent_pages) - 1;
    2485             :         } else {
    2486             :                 /* No change in size occurred */
    2487        1908 :                 blob_persist_generate_new_md(ctx);
    2488        1908 :                 return;
    2489             :         }
    2490             : 
    2491        1688 :         blob_persist_write_extent_pages(seq, ctx, 0);
    2492             : }
    2493             : 
    2494             : struct spdk_bs_mark_dirty {
    2495             :         struct spdk_blob_store          *bs;
    2496             :         struct spdk_bs_super_block      *super;
    2497             :         spdk_bs_sequence_cpl            cb_fn;
    2498             :         void                            *cb_arg;
    2499             : };
    2500             : 
    2501             : static void
    2502         158 : bs_mark_dirty_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2503             : {
    2504         158 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2505             : 
    2506         158 :         if (bserrno == 0) {
    2507         150 :                 ctx->bs->clean = 0;
    2508             :         }
    2509             : 
    2510         158 :         ctx->cb_fn(seq, ctx->cb_arg, bserrno);
    2511             : 
    2512         158 :         spdk_free(ctx->super);
    2513         158 :         free(ctx);
    2514         158 : }
    2515             : 
    2516             : static void bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2517             :                            struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    2518             : 
    2519             : 
    2520             : static void
    2521         158 : bs_mark_dirty_write(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2522             : {
    2523         158 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2524             :         int rc;
    2525             : 
    2526         158 :         if (bserrno != 0) {
    2527           4 :                 bs_mark_dirty_write_cpl(seq, ctx, bserrno);
    2528           4 :                 return;
    2529             :         }
    2530             : 
    2531         154 :         rc = bs_super_validate(ctx->super, ctx->bs);
    2532         154 :         if (rc != 0) {
    2533           0 :                 bs_mark_dirty_write_cpl(seq, ctx, rc);
    2534           0 :                 return;
    2535             :         }
    2536             : 
    2537         154 :         ctx->super->clean = 0;
    2538         154 :         if (ctx->super->size == 0) {
    2539           4 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    2540             :         }
    2541             : 
    2542         154 :         bs_write_super(seq, ctx->bs, ctx->super, bs_mark_dirty_write_cpl, ctx);
    2543             : }
    2544             : 
    2545             : static void
    2546        5526 : bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2547             :               spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2548             : {
    2549             :         struct spdk_bs_mark_dirty *ctx;
    2550             : 
    2551             :         /* Blobstore is already marked dirty */
    2552        5526 :         if (bs->clean == 0) {
    2553        5368 :                 cb_fn(seq, cb_arg, 0);
    2554        5368 :                 return;
    2555             :         }
    2556             : 
    2557         158 :         ctx = calloc(1, sizeof(*ctx));
    2558         158 :         if (!ctx) {
    2559           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2560           0 :                 return;
    2561             :         }
    2562         158 :         ctx->bs = bs;
    2563         158 :         ctx->cb_fn = cb_fn;
    2564         158 :         ctx->cb_arg = cb_arg;
    2565             : 
    2566         158 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    2567             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    2568         158 :         if (!ctx->super) {
    2569           0 :                 free(ctx);
    2570           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2571           0 :                 return;
    2572             :         }
    2573             : 
    2574         158 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    2575         158 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    2576             :                              bs_mark_dirty_write, ctx);
    2577             : }
    2578             : 
    2579             : /* Write a blob to disk */
    2580             : static void
    2581        9104 : blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    2582             :              spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2583             : {
    2584             :         struct spdk_blob_persist_ctx *ctx;
    2585             : 
    2586        9104 :         blob_verify_md_op(blob);
    2587             : 
    2588        9104 :         if (blob->state == SPDK_BLOB_STATE_CLEAN && TAILQ_EMPTY(&blob->persists_to_complete)) {
    2589        4012 :                 cb_fn(seq, cb_arg, 0);
    2590        4012 :                 return;
    2591             :         }
    2592             : 
    2593        5092 :         ctx = calloc(1, sizeof(*ctx));
    2594        5092 :         if (!ctx) {
    2595           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2596           0 :                 return;
    2597             :         }
    2598        5092 :         ctx->blob = blob;
    2599        5092 :         ctx->seq = seq;
    2600        5092 :         ctx->cb_fn = cb_fn;
    2601        5092 :         ctx->cb_arg = cb_arg;
    2602             : 
    2603             :         /* Multiple blob persists can affect one another, via blob->state or
    2604             :          * blob mutable data changes. To prevent it, queue up the persists. */
    2605        5092 :         if (!TAILQ_EMPTY(&blob->persists_to_complete)) {
    2606          23 :                 TAILQ_INSERT_TAIL(&blob->pending_persists, ctx, link);
    2607          23 :                 return;
    2608             :         }
    2609        5069 :         TAILQ_INSERT_HEAD(&blob->persists_to_complete, ctx, link);
    2610             : 
    2611        5069 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, ctx);
    2612             : }
    2613             : 
    2614             : struct spdk_blob_copy_cluster_ctx {
    2615             :         struct spdk_blob *blob;
    2616             :         uint8_t *buf;
    2617             :         uint64_t page;
    2618             :         uint64_t new_cluster;
    2619             :         uint32_t new_extent_page;
    2620             :         spdk_bs_sequence_t *seq;
    2621             :         struct spdk_blob_md_page *new_cluster_page;
    2622             : };
    2623             : 
    2624             : struct spdk_blob_free_cluster_ctx {
    2625             :         struct spdk_blob *blob;
    2626             :         uint64_t page;
    2627             :         struct spdk_blob_md_page *md_page;
    2628             :         uint64_t cluster_num;
    2629             :         uint32_t extent_page;
    2630             :         spdk_bs_sequence_t *seq;
    2631             : };
    2632             : 
    2633             : static void
    2634         812 : blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
    2635             : {
    2636         812 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2637         812 :         struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
    2638         812 :         TAILQ_HEAD(, spdk_bs_request_set) requests;
    2639             :         spdk_bs_user_op_t *op;
    2640             : 
    2641         812 :         TAILQ_INIT(&requests);
    2642         812 :         TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);
    2643             : 
    2644        1624 :         while (!TAILQ_EMPTY(&requests)) {
    2645         812 :                 op = TAILQ_FIRST(&requests);
    2646         812 :                 TAILQ_REMOVE(&requests, op, link);
    2647         812 :                 if (bserrno == 0) {
    2648         812 :                         bs_user_op_execute(op);
    2649             :                 } else {
    2650           0 :                         bs_user_op_abort(op, bserrno);
    2651             :                 }
    2652             :         }
    2653             : 
    2654         812 :         spdk_free(ctx->buf);
    2655         812 :         free(ctx);
    2656         812 : }
    2657             : 
    2658             : static void
    2659          60 : blob_free_cluster_cpl(void *cb_arg, int bserrno)
    2660             : {
    2661          60 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    2662          60 :         spdk_bs_sequence_t *seq = ctx->seq;
    2663             : 
    2664          60 :         bs_sequence_finish(seq, bserrno);
    2665             : 
    2666          60 :         free(ctx);
    2667          60 : }
    2668             : 
    2669             : static void
    2670           4 : blob_insert_cluster_revert(struct spdk_blob_copy_cluster_ctx *ctx)
    2671             : {
    2672           4 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    2673           4 :         bs_release_cluster(ctx->blob->bs, ctx->new_cluster);
    2674           4 :         if (ctx->new_extent_page != 0) {
    2675           2 :                 bs_release_md_page(ctx->blob->bs, ctx->new_extent_page);
    2676             :         }
    2677           4 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    2678           4 : }
    2679             : 
    2680             : static void
    2681           4 : blob_insert_cluster_clear_cpl(void *cb_arg, int bserrno)
    2682             : {
    2683           4 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2684             : 
    2685           4 :         if (bserrno) {
    2686           0 :                 SPDK_WARNLOG("Failed to clear cluster: %d\n", bserrno);
    2687             :         }
    2688             : 
    2689           4 :         blob_insert_cluster_revert(ctx);
    2690           4 :         bs_sequence_finish(ctx->seq, bserrno);
    2691           4 : }
    2692             : 
    2693             : static void
    2694           4 : blob_insert_cluster_clear(struct spdk_blob_copy_cluster_ctx *ctx)
    2695             : {
    2696           4 :         struct spdk_bs_cpl cpl;
    2697             :         spdk_bs_batch_t *batch;
    2698           4 :         struct spdk_io_channel *ch = spdk_io_channel_from_ctx(ctx->seq->channel);
    2699             : 
    2700             :         /*
    2701             :          * We allocated a cluster and we copied data to it. But now, we realized that we don't need
    2702             :          * this cluster and we want to release it. We must ensure that we clear the data on this
    2703             :          * cluster.
    2704             :          * The cluster may later be re-allocated by a thick-provisioned blob for example. When
    2705             :          * reading from this thick-provisioned blob before writing data, we should read zeroes.
    2706             :          */
    2707             : 
    2708           4 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2709           4 :         cpl.u.blob_basic.cb_fn = blob_insert_cluster_clear_cpl;
    2710           4 :         cpl.u.blob_basic.cb_arg = ctx;
    2711             : 
    2712           4 :         batch = bs_batch_open(ch, &cpl, ctx->blob);
    2713           4 :         if (!batch) {
    2714           0 :                 blob_insert_cluster_clear_cpl(ctx, -ENOMEM);
    2715           0 :                 return;
    2716             :         }
    2717             : 
    2718           4 :         bs_batch_clear_dev(ctx->blob, batch, bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2719           4 :                            bs_cluster_to_lba(ctx->blob->bs, 1));
    2720           4 :         bs_batch_close(batch);
    2721             : }
    2722             : 
    2723             : static void
    2724         812 : blob_insert_cluster_cpl(void *cb_arg, int bserrno)
    2725             : {
    2726         812 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2727             : 
    2728         812 :         if (bserrno) {
    2729           4 :                 if (bserrno == -EEXIST) {
    2730             :                         /* The metadata insert failed because another thread
    2731             :                          * allocated the cluster first. Clear and free our cluster
    2732             :                          * but continue without error. */
    2733           4 :                         blob_insert_cluster_clear(ctx);
    2734           4 :                         return;
    2735             :                 }
    2736             : 
    2737           0 :                 blob_insert_cluster_revert(ctx);
    2738             :         }
    2739             : 
    2740         808 :         bs_sequence_finish(ctx->seq, bserrno);
    2741             : }
    2742             : 
    2743             : static void
    2744         408 : blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2745             : {
    2746         408 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2747             :         uint32_t cluster_number;
    2748             : 
    2749         408 :         if (bserrno) {
    2750             :                 /* The write failed, so jump to the final completion handler */
    2751           0 :                 bs_sequence_finish(seq, bserrno);
    2752           0 :                 return;
    2753             :         }
    2754             : 
    2755         408 :         cluster_number = bs_page_to_cluster(ctx->blob->bs, ctx->page);
    2756             : 
    2757         408 :         blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2758             :                                          ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2759             : }
    2760             : 
    2761             : static void
    2762         280 : blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2763             : {
    2764         280 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2765             : 
    2766         280 :         if (bserrno != 0) {
    2767             :                 /* The read failed, so jump to the final completion handler */
    2768           0 :                 bs_sequence_finish(seq, bserrno);
    2769           0 :                 return;
    2770             :         }
    2771             : 
    2772             :         /* Write whole cluster */
    2773         280 :         bs_sequence_write_dev(seq, ctx->buf,
    2774         280 :                               bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2775         280 :                               bs_cluster_to_lba(ctx->blob->bs, 1),
    2776             :                               blob_write_copy_cpl, ctx);
    2777             : }
    2778             : 
    2779             : static bool
    2780         796 : blob_can_copy(struct spdk_blob *blob, uint32_t cluster_start_page, uint64_t *base_lba)
    2781             : {
    2782         796 :         uint64_t lba = bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page);
    2783             : 
    2784        1146 :         return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
    2785         350 :                blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
    2786             : }
    2787             : 
    2788             : static void
    2789         128 : blob_copy(struct spdk_blob_copy_cluster_ctx *ctx, spdk_bs_user_op_t *op, uint64_t src_lba)
    2790             : {
    2791         128 :         struct spdk_blob *blob = ctx->blob;
    2792         128 :         uint64_t lba_count = bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz);
    2793             : 
    2794         128 :         bs_sequence_copy_dev(ctx->seq,
    2795         128 :                              bs_cluster_to_lba(blob->bs, ctx->new_cluster),
    2796             :                              src_lba,
    2797             :                              lba_count,
    2798             :                              blob_write_copy_cpl, ctx);
    2799         128 : }
    2800             : 
    2801             : static void
    2802         812 : bs_allocate_and_copy_cluster(struct spdk_blob *blob,
    2803             :                              struct spdk_io_channel *_ch,
    2804             :                              uint64_t io_unit, spdk_bs_user_op_t *op)
    2805             : {
    2806         812 :         struct spdk_bs_cpl cpl;
    2807             :         struct spdk_bs_channel *ch;
    2808             :         struct spdk_blob_copy_cluster_ctx *ctx;
    2809             :         uint32_t cluster_start_page;
    2810             :         uint32_t cluster_number;
    2811             :         bool is_zeroes;
    2812             :         bool can_copy;
    2813             :         bool is_valid_range;
    2814         812 :         uint64_t copy_src_lba;
    2815             :         int rc;
    2816             : 
    2817         812 :         ch = spdk_io_channel_get_ctx(_ch);
    2818             : 
    2819         812 :         if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
    2820             :                 /* There are already operations pending. Queue this user op
    2821             :                  * and return because it will be re-executed when the outstanding
    2822             :                  * cluster allocation completes. */
    2823           0 :                 TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2824           0 :                 return;
    2825             :         }
    2826             : 
    2827             :         /* Round the io_unit offset down to the first page in the cluster */
    2828         812 :         cluster_start_page = bs_io_unit_to_cluster_start(blob, io_unit);
    2829             : 
    2830             :         /* Calculate which index in the metadata cluster array the corresponding
    2831             :          * cluster is supposed to be at. */
    2832         812 :         cluster_number = bs_io_unit_to_cluster_number(blob, io_unit);
    2833             : 
    2834         812 :         ctx = calloc(1, sizeof(*ctx));
    2835         812 :         if (!ctx) {
    2836           0 :                 bs_user_op_abort(op, -ENOMEM);
    2837           0 :                 return;
    2838             :         }
    2839             : 
    2840         812 :         assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);
    2841             : 
    2842         812 :         ctx->blob = blob;
    2843         812 :         ctx->page = cluster_start_page;
    2844         812 :         ctx->new_cluster_page = ch->new_cluster_page;
    2845         812 :         memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE);
    2846             : 
    2847             :         /* Check if the cluster that we intend to do CoW for is valid for
    2848             :          * the backing dev. For zeroes backing dev, it'll be always valid.
    2849             :          * For other backing dev e.g. a snapshot, it could be invalid if
    2850             :          * the blob has been resized after snapshot was taken. */
    2851         812 :         is_valid_range = blob->back_bs_dev->is_range_valid(blob->back_bs_dev,
    2852             :                          bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2853         812 :                          bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2854             : 
    2855         812 :         can_copy = is_valid_range && blob_can_copy(blob, cluster_start_page, &copy_src_lba);
    2856             : 
    2857        1608 :         is_zeroes = is_valid_range && blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
    2858             :                         bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2859         796 :                         bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2860         812 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes && !can_copy) {
    2861         280 :                 ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
    2862             :                                        NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    2863         280 :                 if (!ctx->buf) {
    2864           0 :                         SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n",
    2865             :                                     blob->bs->cluster_sz);
    2866           0 :                         free(ctx);
    2867           0 :                         bs_user_op_abort(op, -ENOMEM);
    2868           0 :                         return;
    2869             :                 }
    2870             :         }
    2871             : 
    2872         812 :         spdk_spin_lock(&blob->bs->used_lock);
    2873         812 :         rc = bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, &ctx->new_extent_page,
    2874             :                                  false);
    2875         812 :         spdk_spin_unlock(&blob->bs->used_lock);
    2876         812 :         if (rc != 0) {
    2877           0 :                 spdk_free(ctx->buf);
    2878           0 :                 free(ctx);
    2879           0 :                 bs_user_op_abort(op, rc);
    2880           0 :                 return;
    2881             :         }
    2882             : 
    2883         812 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2884         812 :         cpl.u.blob_basic.cb_fn = blob_allocate_and_copy_cluster_cpl;
    2885         812 :         cpl.u.blob_basic.cb_arg = ctx;
    2886             : 
    2887         812 :         ctx->seq = bs_sequence_start_blob(_ch, &cpl, blob);
    2888         812 :         if (!ctx->seq) {
    2889           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    2890           0 :                 bs_release_cluster(blob->bs, ctx->new_cluster);
    2891           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    2892           0 :                 spdk_free(ctx->buf);
    2893           0 :                 free(ctx);
    2894           0 :                 bs_user_op_abort(op, -ENOMEM);
    2895           0 :                 return;
    2896             :         }
    2897             : 
    2898             :         /* Queue the user op to block other incoming operations */
    2899         812 :         TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2900             : 
    2901         812 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
    2902         408 :                 if (can_copy) {
    2903         128 :                         blob_copy(ctx, op, copy_src_lba);
    2904             :                 } else {
    2905             :                         /* Read cluster from backing device */
    2906         280 :                         bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
    2907             :                                                 bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2908         280 :                                                 bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
    2909             :                                                 blob_write_copy, ctx);
    2910             :                 }
    2911             : 
    2912             :         } else {
    2913         404 :                 blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2914             :                                                  ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2915             :         }
    2916             : }
    2917             : 
    2918             : static inline bool
    2919       40206 : blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
    2920             :                                  uint64_t *lba, uint64_t *lba_count)
    2921             : {
    2922       40206 :         *lba_count = length;
    2923             : 
    2924       40206 :         if (!bs_io_unit_is_allocated(blob, io_unit)) {
    2925        2992 :                 assert(blob->back_bs_dev != NULL);
    2926        2992 :                 *lba = bs_io_unit_to_back_dev_lba(blob, io_unit);
    2927        2992 :                 *lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count);
    2928        2992 :                 return false;
    2929             :         } else {
    2930       37214 :                 *lba = bs_blob_io_unit_to_lba(blob, io_unit);
    2931       37214 :                 return true;
    2932             :         }
    2933             : }
    2934             : 
    2935             : struct op_split_ctx {
    2936             :         struct spdk_blob *blob;
    2937             :         struct spdk_io_channel *channel;
    2938             :         uint64_t io_unit_offset;
    2939             :         uint64_t io_units_remaining;
    2940             :         void *curr_payload;
    2941             :         enum spdk_blob_op_type op_type;
    2942             :         spdk_bs_sequence_t *seq;
    2943             :         bool in_submit_ctx;
    2944             :         bool completed_in_submit_ctx;
    2945             :         bool done;
    2946             : };
    2947             : 
    2948             : static void
    2949         774 : blob_request_submit_op_split_next(void *cb_arg, int bserrno)
    2950             : {
    2951         774 :         struct op_split_ctx     *ctx = cb_arg;
    2952         774 :         struct spdk_blob        *blob = ctx->blob;
    2953         774 :         struct spdk_io_channel  *ch = ctx->channel;
    2954         774 :         enum spdk_blob_op_type  op_type = ctx->op_type;
    2955             :         uint8_t                 *buf;
    2956             :         uint64_t                offset;
    2957             :         uint64_t                length;
    2958             :         uint64_t                op_length;
    2959             : 
    2960         774 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    2961         178 :                 bs_sequence_finish(ctx->seq, bserrno);
    2962         178 :                 if (ctx->in_submit_ctx) {
    2963             :                         /* Defer freeing of the ctx object, since it will be
    2964             :                          * accessed when this unwinds back to the submisison
    2965             :                          * context.
    2966             :                          */
    2967          40 :                         ctx->done = true;
    2968             :                 } else {
    2969         138 :                         free(ctx);
    2970             :                 }
    2971         178 :                 return;
    2972             :         }
    2973             : 
    2974         596 :         if (ctx->in_submit_ctx) {
    2975             :                 /* If this split operation completed in the context
    2976             :                  * of its submission, mark the flag and return immediately
    2977             :                  * to avoid recursion.
    2978             :                  */
    2979          68 :                 ctx->completed_in_submit_ctx = true;
    2980          68 :                 return;
    2981             :         }
    2982             : 
    2983             :         while (true) {
    2984         596 :                 ctx->completed_in_submit_ctx = false;
    2985             : 
    2986         596 :                 offset = ctx->io_unit_offset;
    2987         596 :                 length = ctx->io_units_remaining;
    2988         596 :                 buf = ctx->curr_payload;
    2989         596 :                 op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
    2990             :                                      offset));
    2991             : 
    2992             :                 /* Update length and payload for next operation */
    2993         596 :                 ctx->io_units_remaining -= op_length;
    2994         596 :                 ctx->io_unit_offset += op_length;
    2995         596 :                 if (op_type == SPDK_BLOB_WRITE || op_type == SPDK_BLOB_READ) {
    2996         528 :                         ctx->curr_payload += op_length * blob->bs->io_unit_size;
    2997             :                 }
    2998             : 
    2999         596 :                 assert(!ctx->in_submit_ctx);
    3000         596 :                 ctx->in_submit_ctx = true;
    3001             : 
    3002         596 :                 switch (op_type) {
    3003         418 :                 case SPDK_BLOB_READ:
    3004         418 :                         spdk_blob_io_read(blob, ch, buf, offset, op_length,
    3005             :                                           blob_request_submit_op_split_next, ctx);
    3006         418 :                         break;
    3007         110 :                 case SPDK_BLOB_WRITE:
    3008         110 :                         spdk_blob_io_write(blob, ch, buf, offset, op_length,
    3009             :                                            blob_request_submit_op_split_next, ctx);
    3010         110 :                         break;
    3011          36 :                 case SPDK_BLOB_UNMAP:
    3012          36 :                         spdk_blob_io_unmap(blob, ch, offset, op_length,
    3013             :                                            blob_request_submit_op_split_next, ctx);
    3014          36 :                         break;
    3015          32 :                 case SPDK_BLOB_WRITE_ZEROES:
    3016          32 :                         spdk_blob_io_write_zeroes(blob, ch, offset, op_length,
    3017             :                                                   blob_request_submit_op_split_next, ctx);
    3018          32 :                         break;
    3019           0 :                 case SPDK_BLOB_READV:
    3020             :                 case SPDK_BLOB_WRITEV:
    3021           0 :                         SPDK_ERRLOG("readv/write not valid\n");
    3022           0 :                         bs_sequence_finish(ctx->seq, -EINVAL);
    3023           0 :                         free(ctx);
    3024           0 :                         return;
    3025             :                 }
    3026             : 
    3027             : #ifndef __clang_analyzer__
    3028             :                 /* scan-build reports a false positive around accessing the ctx here. It
    3029             :                  * forms a path that recursively calls this function, but then says
    3030             :                  * "assuming ctx->in_submit_ctx is false", when that isn't possible.
    3031             :                  * This path does free(ctx), returns to here, and reports a use-after-free
    3032             :                  * bug.  Wrapping this bit of code so that scan-build doesn't see it
    3033             :                  * works around the scan-build bug.
    3034             :                  */
    3035         596 :                 assert(ctx->in_submit_ctx);
    3036         596 :                 ctx->in_submit_ctx = false;
    3037             : 
    3038             :                 /* If the operation completed immediately, loop back and submit the
    3039             :                  * next operation.  Otherwise we can return and the next split
    3040             :                  * operation will get submitted when this current operation is
    3041             :                  * later completed asynchronously.
    3042             :                  */
    3043         596 :                 if (ctx->completed_in_submit_ctx) {
    3044          68 :                         continue;
    3045         528 :                 } else if (ctx->done) {
    3046          40 :                         free(ctx);
    3047             :                 }
    3048             : #endif
    3049         528 :                 break;
    3050             :         }
    3051             : }
    3052             : 
    3053             : static void
    3054         178 : blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *blob,
    3055             :                              void *payload, uint64_t offset, uint64_t length,
    3056             :                              spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3057             : {
    3058             :         struct op_split_ctx *ctx;
    3059             :         spdk_bs_sequence_t *seq;
    3060         178 :         struct spdk_bs_cpl cpl;
    3061             : 
    3062         178 :         assert(blob != NULL);
    3063             : 
    3064         178 :         ctx = calloc(1, sizeof(struct op_split_ctx));
    3065         178 :         if (ctx == NULL) {
    3066           0 :                 cb_fn(cb_arg, -ENOMEM);
    3067           0 :                 return;
    3068             :         }
    3069             : 
    3070         178 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3071         178 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3072         178 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3073             : 
    3074         178 :         seq = bs_sequence_start_blob(ch, &cpl, blob);
    3075         178 :         if (!seq) {
    3076           0 :                 free(ctx);
    3077           0 :                 cb_fn(cb_arg, -ENOMEM);
    3078           0 :                 return;
    3079             :         }
    3080             : 
    3081         178 :         ctx->blob = blob;
    3082         178 :         ctx->channel = ch;
    3083         178 :         ctx->curr_payload = payload;
    3084         178 :         ctx->io_unit_offset = offset;
    3085         178 :         ctx->io_units_remaining = length;
    3086         178 :         ctx->op_type = op_type;
    3087         178 :         ctx->seq = seq;
    3088             : 
    3089         178 :         blob_request_submit_op_split_next(ctx, 0);
    3090             : }
    3091             : 
    3092             : static void
    3093          60 : spdk_free_cluster_unmap_complete(void *cb_arg, int bserrno)
    3094             : {
    3095          60 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    3096             : 
    3097          60 :         if (bserrno) {
    3098           0 :                 bs_sequence_finish(ctx->seq, bserrno);
    3099           0 :                 free(ctx);
    3100           0 :                 return;
    3101             :         }
    3102             : 
    3103          60 :         blob_free_cluster_on_md_thread(ctx->blob, ctx->cluster_num,
    3104             :                                        ctx->extent_page, ctx->md_page, blob_free_cluster_cpl, ctx);
    3105             : }
    3106             : 
    3107             : static void
    3108       37834 : blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blob,
    3109             :                               void *payload, uint64_t offset, uint64_t length,
    3110             :                               spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3111             : {
    3112       37834 :         struct spdk_bs_cpl cpl;
    3113       37834 :         uint64_t lba;
    3114       37834 :         uint64_t lba_count;
    3115             :         bool is_allocated;
    3116             : 
    3117       37834 :         assert(blob != NULL);
    3118             : 
    3119       37834 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3120       37834 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3121       37834 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3122             : 
    3123       37834 :         if (blob->frozen_refcnt) {
    3124             :                 /* This blob I/O is frozen */
    3125             :                 spdk_bs_user_op_t *op;
    3126           4 :                 struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3127             : 
    3128           4 :                 op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3129           4 :                 if (!op) {
    3130           0 :                         cb_fn(cb_arg, -ENOMEM);
    3131           0 :                         return;
    3132             :                 }
    3133             : 
    3134           4 :                 TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3135             : 
    3136           4 :                 return;
    3137             :         }
    3138             : 
    3139       37830 :         is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3140             : 
    3141       37830 :         switch (op_type) {
    3142       16887 :         case SPDK_BLOB_READ: {
    3143             :                 spdk_bs_batch_t *batch;
    3144             : 
    3145       16887 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3146       16887 :                 if (!batch) {
    3147           0 :                         cb_fn(cb_arg, -ENOMEM);
    3148           0 :                         return;
    3149             :                 }
    3150             : 
    3151       16887 :                 if (is_allocated) {
    3152             :                         /* Read from the blob */
    3153       15799 :                         bs_batch_read_dev(batch, payload, lba, lba_count);
    3154             :                 } else {
    3155             :                         /* Read from the backing block device */
    3156        1088 :                         bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
    3157             :                 }
    3158             : 
    3159       16887 :                 bs_batch_close(batch);
    3160       16887 :                 break;
    3161             :         }
    3162       20851 :         case SPDK_BLOB_WRITE:
    3163             :         case SPDK_BLOB_WRITE_ZEROES: {
    3164       20851 :                 if (is_allocated) {
    3165             :                         /* Write to the blob */
    3166             :                         spdk_bs_batch_t *batch;
    3167             : 
    3168       20507 :                         if (lba_count == 0) {
    3169           0 :                                 cb_fn(cb_arg, 0);
    3170           0 :                                 return;
    3171             :                         }
    3172             : 
    3173       20507 :                         batch = bs_batch_open(_ch, &cpl, blob);
    3174       20507 :                         if (!batch) {
    3175           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3176           0 :                                 return;
    3177             :                         }
    3178             : 
    3179       20507 :                         if (op_type == SPDK_BLOB_WRITE) {
    3180       20475 :                                 bs_batch_write_dev(batch, payload, lba, lba_count);
    3181             :                         } else {
    3182          32 :                                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    3183             :                         }
    3184             : 
    3185       20507 :                         bs_batch_close(batch);
    3186             :                 } else {
    3187             :                         /* Queue this operation and allocate the cluster */
    3188             :                         spdk_bs_user_op_t *op;
    3189             : 
    3190         344 :                         op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3191         344 :                         if (!op) {
    3192           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3193           0 :                                 return;
    3194             :                         }
    3195             : 
    3196         344 :                         bs_allocate_and_copy_cluster(blob, _ch, offset, op);
    3197             :                 }
    3198       20851 :                 break;
    3199             :         }
    3200          92 :         case SPDK_BLOB_UNMAP: {
    3201          92 :                 struct spdk_blob_free_cluster_ctx *ctx = NULL;
    3202             :                 spdk_bs_batch_t *batch;
    3203             : 
    3204             :                 /* if aligned with cluster release cluster */
    3205         160 :                 if (spdk_blob_is_thin_provisioned(blob) && is_allocated &&
    3206          68 :                     bs_io_units_per_cluster(blob) == length) {
    3207          60 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3208             :                         uint32_t cluster_start_page;
    3209             :                         uint32_t cluster_number;
    3210             : 
    3211          60 :                         assert(offset % bs_io_units_per_cluster(blob) == 0);
    3212             : 
    3213             :                         /* Round the io_unit offset down to the first page in the cluster */
    3214          60 :                         cluster_start_page = bs_io_unit_to_cluster_start(blob, offset);
    3215             : 
    3216             :                         /* Calculate which index in the metadata cluster array the corresponding
    3217             :                          * cluster is supposed to be at. */
    3218          60 :                         cluster_number = bs_io_unit_to_cluster_number(blob, offset);
    3219             : 
    3220          60 :                         ctx = calloc(1, sizeof(*ctx));
    3221          60 :                         if (!ctx) {
    3222           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3223           0 :                                 return;
    3224             :                         }
    3225             :                         /* When freeing a cluster the flow should be (in order):
    3226             :                          * 1. Unmap the underlying area (so if the cluster is reclaimed in the future, it won't leak
    3227             :                          * old data)
    3228             :                          * 2. Once the unmap completes (to avoid any races with incoming writes that may claim the
    3229             :                          * cluster), update and sync metadata freeing the cluster
    3230             :                          * 3. Once metadata update is done, complete the user unmap request
    3231             :                          */
    3232          60 :                         ctx->blob = blob;
    3233          60 :                         ctx->page = cluster_start_page;
    3234          60 :                         ctx->cluster_num = cluster_number;
    3235          60 :                         ctx->md_page = bs_channel->new_cluster_page;
    3236          60 :                         ctx->seq = bs_sequence_start_bs(_ch, &cpl);
    3237          60 :                         if (!ctx->seq) {
    3238           0 :                                 free(ctx);
    3239           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3240           0 :                                 return;
    3241             :                         }
    3242             : 
    3243          60 :                         if (blob->use_extent_table) {
    3244          30 :                                 ctx->extent_page = *bs_cluster_to_extent_page(blob, cluster_number);
    3245             :                         }
    3246             : 
    3247          60 :                         cpl.u.blob_basic.cb_fn = spdk_free_cluster_unmap_complete;
    3248          60 :                         cpl.u.blob_basic.cb_arg = ctx;
    3249             :                 }
    3250             : 
    3251          92 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3252          92 :                 if (!batch) {
    3253           0 :                         free(ctx);
    3254           0 :                         cb_fn(cb_arg, -ENOMEM);
    3255           0 :                         return;
    3256             :                 }
    3257             : 
    3258          92 :                 if (is_allocated) {
    3259          92 :                         bs_batch_unmap_dev(batch, lba, lba_count);
    3260             :                 }
    3261             : 
    3262          92 :                 bs_batch_close(batch);
    3263          92 :                 break;
    3264             :         }
    3265           0 :         case SPDK_BLOB_READV:
    3266             :         case SPDK_BLOB_WRITEV:
    3267           0 :                 SPDK_ERRLOG("readv/write not valid\n");
    3268           0 :                 cb_fn(cb_arg, -EINVAL);
    3269           0 :                 break;
    3270             :         }
    3271       37830 : }
    3272             : 
    3273             : static void
    3274       38524 : blob_request_submit_op(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3275             :                        void *payload, uint64_t offset, uint64_t length,
    3276             :                        spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3277             : {
    3278       38524 :         assert(blob != NULL);
    3279             : 
    3280       38524 :         if (blob->data_ro && op_type != SPDK_BLOB_READ) {
    3281           4 :                 cb_fn(cb_arg, -EPERM);
    3282           4 :                 return;
    3283             :         }
    3284             : 
    3285       38520 :         if (length == 0) {
    3286         492 :                 cb_fn(cb_arg, 0);
    3287         492 :                 return;
    3288             :         }
    3289             : 
    3290       38028 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3291          24 :                 cb_fn(cb_arg, -EINVAL);
    3292          24 :                 return;
    3293             :         }
    3294       38004 :         if (length <= bs_num_io_units_to_cluster_boundary(blob, offset)) {
    3295       37826 :                 blob_request_submit_op_single(_channel, blob, payload, offset, length,
    3296             :                                               cb_fn, cb_arg, op_type);
    3297             :         } else {
    3298         178 :                 blob_request_submit_op_split(_channel, blob, payload, offset, length,
    3299             :                                              cb_fn, cb_arg, op_type);
    3300             :         }
    3301             : }
    3302             : 
    3303             : struct rw_iov_ctx {
    3304             :         struct spdk_blob *blob;
    3305             :         struct spdk_io_channel *channel;
    3306             :         spdk_blob_op_complete cb_fn;
    3307             :         void *cb_arg;
    3308             :         bool read;
    3309             :         int iovcnt;
    3310             :         struct iovec *orig_iov;
    3311             :         uint64_t io_unit_offset;
    3312             :         uint64_t io_units_remaining;
    3313             :         uint64_t io_units_done;
    3314             :         struct spdk_blob_ext_io_opts *ext_io_opts;
    3315             :         struct iovec iov[0];
    3316             : };
    3317             : 
    3318             : static void
    3319        2360 : rw_iov_done(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    3320             : {
    3321        2360 :         assert(cb_arg == NULL);
    3322        2360 :         bs_sequence_finish(seq, bserrno);
    3323        2360 : }
    3324             : 
    3325             : static void
    3326         744 : rw_iov_split_next(void *cb_arg, int bserrno)
    3327             : {
    3328         744 :         struct rw_iov_ctx *ctx = cb_arg;
    3329         744 :         struct spdk_blob *blob = ctx->blob;
    3330             :         struct iovec *iov, *orig_iov;
    3331             :         int iovcnt;
    3332             :         size_t orig_iovoff;
    3333             :         uint64_t io_units_count, io_units_to_boundary, io_unit_offset;
    3334             :         uint64_t byte_count;
    3335             : 
    3336         744 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    3337         204 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
    3338         204 :                 free(ctx);
    3339         204 :                 return;
    3340             :         }
    3341             : 
    3342         540 :         io_unit_offset = ctx->io_unit_offset;
    3343         540 :         io_units_to_boundary = bs_num_io_units_to_cluster_boundary(blob, io_unit_offset);
    3344         540 :         io_units_count = spdk_min(ctx->io_units_remaining, io_units_to_boundary);
    3345             :         /*
    3346             :          * Get index and offset into the original iov array for our current position in the I/O sequence.
    3347             :          *  byte_count will keep track of how many bytes remaining until orig_iov and orig_iovoff will
    3348             :          *  point to the current position in the I/O sequence.
    3349             :          */
    3350         540 :         byte_count = ctx->io_units_done * blob->bs->io_unit_size;
    3351         540 :         orig_iov = &ctx->orig_iov[0];
    3352         540 :         orig_iovoff = 0;
    3353        1148 :         while (byte_count > 0) {
    3354         608 :                 if (byte_count >= orig_iov->iov_len) {
    3355         352 :                         byte_count -= orig_iov->iov_len;
    3356         352 :                         orig_iov++;
    3357             :                 } else {
    3358         256 :                         orig_iovoff = byte_count;
    3359         256 :                         byte_count = 0;
    3360             :                 }
    3361             :         }
    3362             : 
    3363             :         /*
    3364             :          * Build an iov array for the next I/O in the sequence.  byte_count will keep track of how many
    3365             :          *  bytes of this next I/O remain to be accounted for in the new iov array.
    3366             :          */
    3367         540 :         byte_count = io_units_count * blob->bs->io_unit_size;
    3368         540 :         iov = &ctx->iov[0];
    3369         540 :         iovcnt = 0;
    3370        1380 :         while (byte_count > 0) {
    3371         840 :                 assert(iovcnt < ctx->iovcnt);
    3372         840 :                 iov->iov_len = spdk_min(byte_count, orig_iov->iov_len - orig_iovoff);
    3373         840 :                 iov->iov_base = orig_iov->iov_base + orig_iovoff;
    3374         840 :                 byte_count -= iov->iov_len;
    3375         840 :                 orig_iovoff = 0;
    3376         840 :                 orig_iov++;
    3377         840 :                 iov++;
    3378         840 :                 iovcnt++;
    3379             :         }
    3380             : 
    3381         540 :         ctx->io_unit_offset += io_units_count;
    3382         540 :         ctx->io_units_remaining -= io_units_count;
    3383         540 :         ctx->io_units_done += io_units_count;
    3384         540 :         iov = &ctx->iov[0];
    3385             : 
    3386         540 :         if (ctx->read) {
    3387         408 :                 spdk_blob_io_readv_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3388             :                                        io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3389             :         } else {
    3390         132 :                 spdk_blob_io_writev_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3391             :                                         io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3392             :         }
    3393             : }
    3394             : 
    3395             : static void
    3396        2588 : blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3397             :                            struct iovec *iov, int iovcnt,
    3398             :                            uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg, bool read,
    3399             :                            struct spdk_blob_ext_io_opts *ext_io_opts)
    3400             : {
    3401        2588 :         struct spdk_bs_cpl      cpl;
    3402             : 
    3403        2588 :         assert(blob != NULL);
    3404             : 
    3405        2588 :         if (!read && blob->data_ro) {
    3406           4 :                 cb_fn(cb_arg, -EPERM);
    3407           4 :                 return;
    3408             :         }
    3409             : 
    3410        2584 :         if (length == 0) {
    3411           0 :                 cb_fn(cb_arg, 0);
    3412           0 :                 return;
    3413             :         }
    3414             : 
    3415        2584 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3416           0 :                 cb_fn(cb_arg, -EINVAL);
    3417           0 :                 return;
    3418             :         }
    3419             : 
    3420             :         /*
    3421             :          * For now, we implement readv/writev using a sequence (instead of a batch) to account for having
    3422             :          *  to split a request that spans a cluster boundary.  For I/O that do not span a cluster boundary,
    3423             :          *  there will be no noticeable difference compared to using a batch.  For I/O that do span a cluster
    3424             :          *  boundary, the target LBAs (after blob offset to LBA translation) may not be contiguous, so we need
    3425             :          *  to allocate a separate iov array and split the I/O such that none of the resulting
    3426             :          *  smaller I/O cross a cluster boundary.  These smaller I/O will be issued in sequence (not in parallel)
    3427             :          *  but since this case happens very infrequently, any performance impact will be negligible.
    3428             :          *
    3429             :          * This could be optimized in the future to allocate a big enough iov array to account for all of the iovs
    3430             :          *  for all of the smaller I/Os, pre-build all of the iov arrays for the smaller I/Os, then issue them
    3431             :          *  in a batch.  That would also require creating an intermediate spdk_bs_cpl that would get called
    3432             :          *  when the batch was completed, to allow for freeing the memory for the iov arrays.
    3433             :          */
    3434        2584 :         if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
    3435        2376 :                 uint64_t lba_count;
    3436        2376 :                 uint64_t lba;
    3437             :                 bool is_allocated;
    3438             : 
    3439        2376 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3440        2376 :                 cpl.u.blob_basic.cb_fn = cb_fn;
    3441        2376 :                 cpl.u.blob_basic.cb_arg = cb_arg;
    3442             : 
    3443        2376 :                 if (blob->frozen_refcnt) {
    3444             :                         /* This blob I/O is frozen */
    3445             :                         enum spdk_blob_op_type op_type;
    3446             :                         spdk_bs_user_op_t *op;
    3447           0 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_channel);
    3448             : 
    3449           0 :                         op_type = read ? SPDK_BLOB_READV : SPDK_BLOB_WRITEV;
    3450           0 :                         op = bs_user_op_alloc(_channel, &cpl, op_type, blob, iov, iovcnt, offset, length);
    3451           0 :                         if (!op) {
    3452           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3453           0 :                                 return;
    3454             :                         }
    3455             : 
    3456           0 :                         TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3457             : 
    3458           0 :                         return;
    3459             :                 }
    3460             : 
    3461        2376 :                 is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3462             : 
    3463        2376 :                 if (read) {
    3464             :                         spdk_bs_sequence_t *seq;
    3465             : 
    3466        2084 :                         seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3467        2084 :                         if (!seq) {
    3468           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3469           0 :                                 return;
    3470             :                         }
    3471             : 
    3472        2084 :                         seq->ext_io_opts = ext_io_opts;
    3473             : 
    3474        2084 :                         if (is_allocated) {
    3475         540 :                                 bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3476             :                         } else {
    3477        1544 :                                 bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
    3478             :                                                          rw_iov_done, NULL);
    3479             :                         }
    3480             :                 } else {
    3481         292 :                         if (is_allocated) {
    3482             :                                 spdk_bs_sequence_t *seq;
    3483             : 
    3484         276 :                                 seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3485         276 :                                 if (!seq) {
    3486           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3487           0 :                                         return;
    3488             :                                 }
    3489             : 
    3490         276 :                                 seq->ext_io_opts = ext_io_opts;
    3491             : 
    3492         276 :                                 bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3493             :                         } else {
    3494             :                                 /* Queue this operation and allocate the cluster */
    3495             :                                 spdk_bs_user_op_t *op;
    3496             : 
    3497          16 :                                 op = bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, blob, iov, iovcnt, offset,
    3498             :                                                       length);
    3499          16 :                                 if (!op) {
    3500           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3501           0 :                                         return;
    3502             :                                 }
    3503             : 
    3504          16 :                                 op->ext_io_opts = ext_io_opts;
    3505             : 
    3506          16 :                                 bs_allocate_and_copy_cluster(blob, _channel, offset, op);
    3507             :                         }
    3508             :                 }
    3509             :         } else {
    3510             :                 struct rw_iov_ctx *ctx;
    3511             : 
    3512         208 :                 ctx = calloc(1, sizeof(struct rw_iov_ctx) + iovcnt * sizeof(struct iovec));
    3513         208 :                 if (ctx == NULL) {
    3514           4 :                         cb_fn(cb_arg, -ENOMEM);
    3515           4 :                         return;
    3516             :                 }
    3517             : 
    3518         204 :                 ctx->blob = blob;
    3519         204 :                 ctx->channel = _channel;
    3520         204 :                 ctx->cb_fn = cb_fn;
    3521         204 :                 ctx->cb_arg = cb_arg;
    3522         204 :                 ctx->read = read;
    3523         204 :                 ctx->orig_iov = iov;
    3524         204 :                 ctx->iovcnt = iovcnt;
    3525         204 :                 ctx->io_unit_offset = offset;
    3526         204 :                 ctx->io_units_remaining = length;
    3527         204 :                 ctx->io_units_done = 0;
    3528         204 :                 ctx->ext_io_opts = ext_io_opts;
    3529             : 
    3530         204 :                 rw_iov_split_next(ctx, 0);
    3531             :         }
    3532             : }
    3533             : 
    3534             : static struct spdk_blob *
    3535        7709 : blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid)
    3536             : {
    3537        7709 :         struct spdk_blob find;
    3538             : 
    3539        7709 :         if (spdk_bit_array_get(bs->open_blobids, blobid) == 0) {
    3540        6932 :                 return NULL;
    3541             :         }
    3542             : 
    3543         777 :         find.id = blobid;
    3544         777 :         return RB_FIND(spdk_blob_tree, &bs->open_blobs, &find);
    3545             : }
    3546             : 
    3547             : static void
    3548        1798 : blob_get_snapshot_and_clone_entries(struct spdk_blob *blob,
    3549             :                                     struct spdk_blob_list **snapshot_entry, struct spdk_blob_list **clone_entry)
    3550             : {
    3551        1798 :         assert(blob != NULL);
    3552        1798 :         *snapshot_entry = NULL;
    3553        1798 :         *clone_entry = NULL;
    3554             : 
    3555        1798 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    3556        1518 :                 return;
    3557             :         }
    3558             : 
    3559         424 :         TAILQ_FOREACH(*snapshot_entry, &blob->bs->snapshots, link) {
    3560         372 :                 if ((*snapshot_entry)->id == blob->parent_id) {
    3561         228 :                         break;
    3562             :                 }
    3563             :         }
    3564             : 
    3565         280 :         if (*snapshot_entry != NULL) {
    3566         272 :                 TAILQ_FOREACH(*clone_entry, &(*snapshot_entry)->clones, link) {
    3567         272 :                         if ((*clone_entry)->id == blob->id) {
    3568         228 :                                 break;
    3569             :                         }
    3570             :                 }
    3571             : 
    3572         228 :                 assert(*clone_entry != NULL);
    3573             :         }
    3574             : }
    3575             : 
    3576             : static int
    3577         796 : bs_channel_create(void *io_device, void *ctx_buf)
    3578             : {
    3579         796 :         struct spdk_blob_store          *bs = io_device;
    3580         796 :         struct spdk_bs_channel          *channel = ctx_buf;
    3581             :         struct spdk_bs_dev              *dev;
    3582         796 :         uint32_t                        max_ops = bs->max_channel_ops;
    3583             :         uint32_t                        i;
    3584             : 
    3585         796 :         dev = bs->dev;
    3586             : 
    3587         796 :         channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set));
    3588         796 :         if (!channel->req_mem) {
    3589           0 :                 return -1;
    3590             :         }
    3591             : 
    3592         796 :         TAILQ_INIT(&channel->reqs);
    3593             : 
    3594      408348 :         for (i = 0; i < max_ops; i++) {
    3595      407552 :                 TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
    3596             :         }
    3597             : 
    3598         796 :         channel->bs = bs;
    3599         796 :         channel->dev = dev;
    3600         796 :         channel->dev_channel = dev->create_channel(dev);
    3601             : 
    3602         796 :         if (!channel->dev_channel) {
    3603           0 :                 SPDK_ERRLOG("Failed to create device channel.\n");
    3604           0 :                 free(channel->req_mem);
    3605           0 :                 return -1;
    3606             :         }
    3607             : 
    3608         796 :         channel->new_cluster_page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY,
    3609             :                                     SPDK_MALLOC_DMA);
    3610         796 :         if (!channel->new_cluster_page) {
    3611           0 :                 SPDK_ERRLOG("Failed to allocate new cluster page\n");
    3612           0 :                 free(channel->req_mem);
    3613           0 :                 channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3614           0 :                 return -1;
    3615             :         }
    3616             : 
    3617         796 :         TAILQ_INIT(&channel->need_cluster_alloc);
    3618         796 :         TAILQ_INIT(&channel->queued_io);
    3619         796 :         RB_INIT(&channel->esnap_channels);
    3620             : 
    3621         796 :         return 0;
    3622             : }
    3623             : 
    3624             : static void
    3625         796 : bs_channel_destroy(void *io_device, void *ctx_buf)
    3626             : {
    3627         796 :         struct spdk_bs_channel *channel = ctx_buf;
    3628             :         spdk_bs_user_op_t *op;
    3629             : 
    3630         796 :         while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
    3631           0 :                 op = TAILQ_FIRST(&channel->need_cluster_alloc);
    3632           0 :                 TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
    3633           0 :                 bs_user_op_abort(op, -EIO);
    3634             :         }
    3635             : 
    3636         796 :         while (!TAILQ_EMPTY(&channel->queued_io)) {
    3637           0 :                 op = TAILQ_FIRST(&channel->queued_io);
    3638           0 :                 TAILQ_REMOVE(&channel->queued_io, op, link);
    3639           0 :                 bs_user_op_abort(op, -EIO);
    3640             :         }
    3641             : 
    3642         796 :         blob_esnap_destroy_bs_channel(channel);
    3643             : 
    3644         796 :         free(channel->req_mem);
    3645         796 :         spdk_free(channel->new_cluster_page);
    3646         796 :         channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3647         796 : }
    3648             : 
    3649             : static void
    3650         780 : bs_dev_destroy(void *io_device)
    3651             : {
    3652         780 :         struct spdk_blob_store *bs = io_device;
    3653             :         struct spdk_blob        *blob, *blob_tmp;
    3654             : 
    3655         780 :         bs->dev->destroy(bs->dev);
    3656             : 
    3657         780 :         RB_FOREACH_SAFE(blob, spdk_blob_tree, &bs->open_blobs, blob_tmp) {
    3658           0 :                 RB_REMOVE(spdk_blob_tree, &bs->open_blobs, blob);
    3659           0 :                 spdk_bit_array_clear(bs->open_blobids, blob->id);
    3660           0 :                 blob_free(blob);
    3661             :         }
    3662             : 
    3663         780 :         spdk_spin_destroy(&bs->used_lock);
    3664             : 
    3665         780 :         spdk_bit_array_free(&bs->open_blobids);
    3666         780 :         spdk_bit_array_free(&bs->used_blobids);
    3667         780 :         spdk_bit_array_free(&bs->used_md_pages);
    3668         780 :         spdk_bit_pool_free(&bs->used_clusters);
    3669             :         /*
    3670             :          * If this function is called for any reason except a successful unload,
    3671             :          * the unload_cpl type will be NONE and this will be a nop.
    3672             :          */
    3673         780 :         bs_call_cpl(&bs->unload_cpl, bs->unload_err);
    3674             : 
    3675         780 :         free(bs);
    3676         780 : }
    3677             : 
    3678             : static int
    3679         900 : bs_blob_list_add(struct spdk_blob *blob)
    3680             : {
    3681             :         spdk_blob_id snapshot_id;
    3682         900 :         struct spdk_blob_list *snapshot_entry = NULL;
    3683         900 :         struct spdk_blob_list *clone_entry = NULL;
    3684             : 
    3685         900 :         assert(blob != NULL);
    3686             : 
    3687         900 :         snapshot_id = blob->parent_id;
    3688         900 :         if (snapshot_id == SPDK_BLOBID_INVALID ||
    3689             :             snapshot_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    3690         488 :                 return 0;
    3691             :         }
    3692             : 
    3693         412 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, snapshot_id);
    3694         412 :         if (snapshot_entry == NULL) {
    3695             :                 /* Snapshot not found */
    3696         284 :                 snapshot_entry = calloc(1, sizeof(struct spdk_blob_list));
    3697         284 :                 if (snapshot_entry == NULL) {
    3698           0 :                         return -ENOMEM;
    3699             :                 }
    3700         284 :                 snapshot_entry->id = snapshot_id;
    3701         284 :                 TAILQ_INIT(&snapshot_entry->clones);
    3702         284 :                 TAILQ_INSERT_TAIL(&blob->bs->snapshots, snapshot_entry, link);
    3703             :         } else {
    3704         204 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    3705          76 :                         if (clone_entry->id == blob->id) {
    3706           0 :                                 break;
    3707             :                         }
    3708             :                 }
    3709             :         }
    3710             : 
    3711         412 :         if (clone_entry == NULL) {
    3712             :                 /* Clone not found */
    3713         412 :                 clone_entry = calloc(1, sizeof(struct spdk_blob_list));
    3714         412 :                 if (clone_entry == NULL) {
    3715           0 :                         return -ENOMEM;
    3716             :                 }
    3717         412 :                 clone_entry->id = blob->id;
    3718         412 :                 TAILQ_INIT(&clone_entry->clones);
    3719         412 :                 TAILQ_INSERT_TAIL(&snapshot_entry->clones, clone_entry, link);
    3720         412 :                 snapshot_entry->clone_count++;
    3721             :         }
    3722             : 
    3723         412 :         return 0;
    3724             : }
    3725             : 
    3726             : static void
    3727        1720 : bs_blob_list_remove(struct spdk_blob *blob)
    3728             : {
    3729        1720 :         struct spdk_blob_list *snapshot_entry = NULL;
    3730        1720 :         struct spdk_blob_list *clone_entry = NULL;
    3731             : 
    3732        1720 :         blob_get_snapshot_and_clone_entries(blob, &snapshot_entry, &clone_entry);
    3733             : 
    3734        1720 :         if (snapshot_entry == NULL) {
    3735        1508 :                 return;
    3736             :         }
    3737             : 
    3738         212 :         blob->parent_id = SPDK_BLOBID_INVALID;
    3739         212 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3740         212 :         free(clone_entry);
    3741             : 
    3742         212 :         snapshot_entry->clone_count--;
    3743             : }
    3744             : 
    3745             : static int
    3746         780 : bs_blob_list_free(struct spdk_blob_store *bs)
    3747             : {
    3748             :         struct spdk_blob_list *snapshot_entry;
    3749             :         struct spdk_blob_list *snapshot_entry_tmp;
    3750             :         struct spdk_blob_list *clone_entry;
    3751             :         struct spdk_blob_list *clone_entry_tmp;
    3752             : 
    3753         924 :         TAILQ_FOREACH_SAFE(snapshot_entry, &bs->snapshots, link, snapshot_entry_tmp) {
    3754         296 :                 TAILQ_FOREACH_SAFE(clone_entry, &snapshot_entry->clones, link, clone_entry_tmp) {
    3755         152 :                         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3756         152 :                         free(clone_entry);
    3757             :                 }
    3758         144 :                 TAILQ_REMOVE(&bs->snapshots, snapshot_entry, link);
    3759         144 :                 free(snapshot_entry);
    3760             :         }
    3761             : 
    3762         780 :         return 0;
    3763             : }
    3764             : 
    3765             : static void
    3766         780 : bs_free(struct spdk_blob_store *bs)
    3767             : {
    3768         780 :         bs_blob_list_free(bs);
    3769             : 
    3770         780 :         bs_unregister_md_thread(bs);
    3771         780 :         spdk_io_device_unregister(bs, bs_dev_destroy);
    3772         780 : }
    3773             : 
    3774             : void
    3775        1048 : spdk_bs_opts_init(struct spdk_bs_opts *opts, size_t opts_size)
    3776             : {
    3777             : 
    3778        1048 :         if (!opts) {
    3779           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
    3780           0 :                 return;
    3781             :         }
    3782             : 
    3783        1048 :         if (!opts_size) {
    3784           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    3785           0 :                 return;
    3786             :         }
    3787             : 
    3788        1048 :         memset(opts, 0, opts_size);
    3789        1048 :         opts->opts_size = opts_size;
    3790             : 
    3791             : #define FIELD_OK(field) \
    3792             :         offsetof(struct spdk_bs_opts, field) + sizeof(opts->field) <= opts_size
    3793             : 
    3794             : #define SET_FIELD(field, value) \
    3795             :         if (FIELD_OK(field)) { \
    3796             :                 opts->field = value; \
    3797             :         } \
    3798             : 
    3799        1048 :         SET_FIELD(cluster_sz, SPDK_BLOB_OPTS_CLUSTER_SZ);
    3800        1048 :         SET_FIELD(num_md_pages, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3801        1048 :         SET_FIELD(max_md_ops, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3802        1048 :         SET_FIELD(max_channel_ops, SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS);
    3803        1048 :         SET_FIELD(clear_method,  BS_CLEAR_WITH_UNMAP);
    3804             : 
    3805        1048 :         if (FIELD_OK(bstype)) {
    3806        1048 :                 memset(&opts->bstype, 0, sizeof(opts->bstype));
    3807             :         }
    3808             : 
    3809        1048 :         SET_FIELD(iter_cb_fn, NULL);
    3810        1048 :         SET_FIELD(iter_cb_arg, NULL);
    3811        1048 :         SET_FIELD(force_recover, false);
    3812        1048 :         SET_FIELD(esnap_bs_dev_create, NULL);
    3813        1048 :         SET_FIELD(esnap_ctx, NULL);
    3814             : 
    3815             : #undef FIELD_OK
    3816             : #undef SET_FIELD
    3817             : }
    3818             : 
    3819             : static int
    3820         484 : bs_opts_verify(struct spdk_bs_opts *opts)
    3821             : {
    3822         484 :         if (opts->cluster_sz == 0 || opts->num_md_pages == 0 || opts->max_md_ops == 0 ||
    3823         480 :             opts->max_channel_ops == 0) {
    3824           4 :                 SPDK_ERRLOG("Blobstore options cannot be set to 0\n");
    3825           4 :                 return -1;
    3826             :         }
    3827             : 
    3828         480 :         return 0;
    3829             : }
    3830             : 
    3831             : /* START spdk_bs_load */
    3832             : 
    3833             : /* spdk_bs_load_ctx is used for init, load, unload and dump code paths. */
    3834             : 
    3835             : struct spdk_bs_load_ctx {
    3836             :         struct spdk_blob_store          *bs;
    3837             :         struct spdk_bs_super_block      *super;
    3838             : 
    3839             :         struct spdk_bs_md_mask          *mask;
    3840             :         bool                            in_page_chain;
    3841             :         uint32_t                        page_index;
    3842             :         uint32_t                        cur_page;
    3843             :         struct spdk_blob_md_page        *page;
    3844             : 
    3845             :         uint64_t                        num_extent_pages;
    3846             :         uint32_t                        *extent_page_num;
    3847             :         struct spdk_blob_md_page        *extent_pages;
    3848             :         struct spdk_bit_array           *used_clusters;
    3849             : 
    3850             :         spdk_bs_sequence_t                      *seq;
    3851             :         spdk_blob_op_with_handle_complete       iter_cb_fn;
    3852             :         void                                    *iter_cb_arg;
    3853             :         struct spdk_blob                        *blob;
    3854             :         spdk_blob_id                            blobid;
    3855             : 
    3856             :         bool                                    force_recover;
    3857             : 
    3858             :         /* These fields are used in the spdk_bs_dump path. */
    3859             :         bool                                    dumping;
    3860             :         FILE                                    *fp;
    3861             :         spdk_bs_dump_print_xattr                print_xattr_fn;
    3862             :         char                                    xattr_name[4096];
    3863             : };
    3864             : 
    3865             : static int
    3866         784 : bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, struct spdk_blob_store **_bs,
    3867             :          struct spdk_bs_load_ctx **_ctx)
    3868             : {
    3869             :         struct spdk_blob_store  *bs;
    3870             :         struct spdk_bs_load_ctx *ctx;
    3871             :         uint64_t dev_size;
    3872             :         int rc;
    3873             : 
    3874         784 :         dev_size = dev->blocklen * dev->blockcnt;
    3875         784 :         if (dev_size < opts->cluster_sz) {
    3876             :                 /* Device size cannot be smaller than cluster size of blobstore */
    3877           0 :                 SPDK_INFOLOG(blob, "Device size %" PRIu64 " is smaller than cluster size %" PRIu32 "\n",
    3878             :                              dev_size, opts->cluster_sz);
    3879           0 :                 return -ENOSPC;
    3880             :         }
    3881         784 :         if (opts->cluster_sz < SPDK_BS_PAGE_SIZE) {
    3882             :                 /* Cluster size cannot be smaller than page size */
    3883           4 :                 SPDK_ERRLOG("Cluster size %" PRIu32 " is smaller than page size %d\n",
    3884             :                             opts->cluster_sz, SPDK_BS_PAGE_SIZE);
    3885           4 :                 return -EINVAL;
    3886             :         }
    3887         780 :         bs = calloc(1, sizeof(struct spdk_blob_store));
    3888         780 :         if (!bs) {
    3889           0 :                 return -ENOMEM;
    3890             :         }
    3891             : 
    3892         780 :         ctx = calloc(1, sizeof(struct spdk_bs_load_ctx));
    3893         780 :         if (!ctx) {
    3894           0 :                 free(bs);
    3895           0 :                 return -ENOMEM;
    3896             :         }
    3897             : 
    3898         780 :         ctx->bs = bs;
    3899         780 :         ctx->iter_cb_fn = opts->iter_cb_fn;
    3900         780 :         ctx->iter_cb_arg = opts->iter_cb_arg;
    3901         780 :         ctx->force_recover = opts->force_recover;
    3902             : 
    3903         780 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    3904             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    3905         780 :         if (!ctx->super) {
    3906           0 :                 free(ctx);
    3907           0 :                 free(bs);
    3908           0 :                 return -ENOMEM;
    3909             :         }
    3910             : 
    3911         780 :         RB_INIT(&bs->open_blobs);
    3912         780 :         TAILQ_INIT(&bs->snapshots);
    3913         780 :         bs->dev = dev;
    3914         780 :         bs->md_thread = spdk_get_thread();
    3915         780 :         assert(bs->md_thread != NULL);
    3916             : 
    3917             :         /*
    3918             :          * Do not use bs_lba_to_cluster() here since blockcnt may not be an
    3919             :          *  even multiple of the cluster size.
    3920             :          */
    3921         780 :         bs->cluster_sz = opts->cluster_sz;
    3922         780 :         bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen);
    3923         780 :         ctx->used_clusters = spdk_bit_array_create(bs->total_clusters);
    3924         780 :         if (!ctx->used_clusters) {
    3925           0 :                 spdk_free(ctx->super);
    3926           0 :                 free(ctx);
    3927           0 :                 free(bs);
    3928           0 :                 return -ENOMEM;
    3929             :         }
    3930             : 
    3931         780 :         bs->pages_per_cluster = bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    3932         780 :         if (spdk_u32_is_pow2(bs->pages_per_cluster)) {
    3933         780 :                 bs->pages_per_cluster_shift = spdk_u32log2(bs->pages_per_cluster);
    3934             :         }
    3935         780 :         bs->num_free_clusters = bs->total_clusters;
    3936         780 :         bs->io_unit_size = dev->blocklen;
    3937             : 
    3938         780 :         bs->max_channel_ops = opts->max_channel_ops;
    3939         780 :         bs->super_blob = SPDK_BLOBID_INVALID;
    3940         780 :         memcpy(&bs->bstype, &opts->bstype, sizeof(opts->bstype));
    3941         780 :         bs->esnap_bs_dev_create = opts->esnap_bs_dev_create;
    3942         780 :         bs->esnap_ctx = opts->esnap_ctx;
    3943             : 
    3944             :         /* The metadata is assumed to be at least 1 page */
    3945         780 :         bs->used_md_pages = spdk_bit_array_create(1);
    3946         780 :         bs->used_blobids = spdk_bit_array_create(0);
    3947         780 :         bs->open_blobids = spdk_bit_array_create(0);
    3948             : 
    3949         780 :         spdk_spin_init(&bs->used_lock);
    3950             : 
    3951         780 :         spdk_io_device_register(bs, bs_channel_create, bs_channel_destroy,
    3952             :                                 sizeof(struct spdk_bs_channel), "blobstore");
    3953         780 :         rc = bs_register_md_thread(bs);
    3954         780 :         if (rc == -1) {
    3955           0 :                 spdk_io_device_unregister(bs, NULL);
    3956           0 :                 spdk_spin_destroy(&bs->used_lock);
    3957           0 :                 spdk_bit_array_free(&bs->open_blobids);
    3958           0 :                 spdk_bit_array_free(&bs->used_blobids);
    3959           0 :                 spdk_bit_array_free(&bs->used_md_pages);
    3960           0 :                 spdk_bit_array_free(&ctx->used_clusters);
    3961           0 :                 spdk_free(ctx->super);
    3962           0 :                 free(ctx);
    3963           0 :                 free(bs);
    3964             :                 /* FIXME: this is a lie but don't know how to get a proper error code here */
    3965           0 :                 return -ENOMEM;
    3966             :         }
    3967             : 
    3968         780 :         *_ctx = ctx;
    3969         780 :         *_bs = bs;
    3970         780 :         return 0;
    3971             : }
    3972             : 
    3973             : static void
    3974          24 : bs_load_ctx_fail(struct spdk_bs_load_ctx *ctx, int bserrno)
    3975             : {
    3976          24 :         assert(bserrno != 0);
    3977             : 
    3978          24 :         spdk_free(ctx->super);
    3979          24 :         bs_sequence_finish(ctx->seq, bserrno);
    3980          24 :         bs_free(ctx->bs);
    3981          24 :         spdk_bit_array_free(&ctx->used_clusters);
    3982          24 :         free(ctx);
    3983          24 : }
    3984             : 
    3985             : static void
    3986         824 : bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    3987             :                struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    3988             : {
    3989             :         /* Update the values in the super block */
    3990         824 :         super->super_blob = bs->super_blob;
    3991         824 :         memcpy(&super->bstype, &bs->bstype, sizeof(bs->bstype));
    3992         824 :         super->crc = blob_md_page_calc_crc(super);
    3993         824 :         bs_sequence_write_dev(seq, super, bs_page_to_lba(bs, 0),
    3994         824 :                               bs_byte_to_lba(bs, sizeof(*super)),
    3995             :                               cb_fn, cb_arg);
    3996         824 : }
    3997             : 
    3998             : static void
    3999         760 : bs_write_used_clusters(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4000             : {
    4001         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4002             :         uint64_t        mask_size, lba, lba_count;
    4003             : 
    4004             :         /* Write out the used clusters mask */
    4005         760 :         mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    4006         760 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4007             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4008         760 :         if (!ctx->mask) {
    4009           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4010           0 :                 return;
    4011             :         }
    4012             : 
    4013         760 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS;
    4014         760 :         ctx->mask->length = ctx->bs->total_clusters;
    4015             :         /* We could get here through the normal unload path, or through dirty
    4016             :          * shutdown recovery.  For the normal unload path, we use the mask from
    4017             :          * the bit pool.  For dirty shutdown recovery, we don't have a bit pool yet -
    4018             :          * only the bit array from the load ctx.
    4019             :          */
    4020         760 :         if (ctx->bs->used_clusters) {
    4021         654 :                 assert(ctx->mask->length == spdk_bit_pool_capacity(ctx->bs->used_clusters));
    4022         654 :                 spdk_bit_pool_store_mask(ctx->bs->used_clusters, ctx->mask->mask);
    4023             :         } else {
    4024         106 :                 assert(ctx->mask->length == spdk_bit_array_capacity(ctx->used_clusters));
    4025         106 :                 spdk_bit_array_store_mask(ctx->used_clusters, ctx->mask->mask);
    4026             :         }
    4027         760 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4028         760 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4029         760 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4030             : }
    4031             : 
    4032             : static void
    4033         760 : bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4034             : {
    4035         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4036             :         uint64_t        mask_size, lba, lba_count;
    4037             : 
    4038         760 :         mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
    4039         760 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4040             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4041         760 :         if (!ctx->mask) {
    4042           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4043           0 :                 return;
    4044             :         }
    4045             : 
    4046         760 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES;
    4047         760 :         ctx->mask->length = ctx->super->md_len;
    4048         760 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages));
    4049             : 
    4050         760 :         spdk_bit_array_store_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4051         760 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4052         760 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4053         760 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4054             : }
    4055             : 
    4056             : static void
    4057         760 : bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4058             : {
    4059         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4060             :         uint64_t        mask_size, lba, lba_count;
    4061             : 
    4062         760 :         if (ctx->super->used_blobid_mask_len == 0) {
    4063             :                 /*
    4064             :                  * This is a pre-v3 on-disk format where the blobid mask does not get
    4065             :                  *  written to disk.
    4066             :                  */
    4067          24 :                 cb_fn(seq, arg, 0);
    4068          24 :                 return;
    4069             :         }
    4070             : 
    4071         736 :         mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
    4072         736 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4073             :                                  SPDK_MALLOC_DMA);
    4074         736 :         if (!ctx->mask) {
    4075           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4076           0 :                 return;
    4077             :         }
    4078             : 
    4079         736 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
    4080         736 :         ctx->mask->length = ctx->super->md_len;
    4081         736 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
    4082             : 
    4083         736 :         spdk_bit_array_store_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4084         736 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4085         736 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4086         736 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4087             : }
    4088             : 
    4089             : static void
    4090         696 : blob_set_thin_provision(struct spdk_blob *blob)
    4091             : {
    4092         696 :         blob_verify_md_op(blob);
    4093         696 :         blob->invalid_flags |= SPDK_BLOB_THIN_PROV;
    4094         696 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4095         696 : }
    4096             : 
    4097             : static void
    4098        2086 : blob_set_clear_method(struct spdk_blob *blob, enum blob_clear_method clear_method)
    4099             : {
    4100        2086 :         blob_verify_md_op(blob);
    4101        2086 :         blob->clear_method = clear_method;
    4102        2086 :         blob->md_ro_flags |= (clear_method << SPDK_BLOB_CLEAR_METHOD_SHIFT);
    4103        2086 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4104        2086 : }
    4105             : 
    4106             : static void bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno);
    4107             : 
    4108             : static void
    4109          24 : bs_delete_corrupted_blob_cpl(void *cb_arg, int bserrno)
    4110             : {
    4111          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4112             :         spdk_blob_id id;
    4113             :         int64_t page_num;
    4114             : 
    4115             :         /* Iterate to next blob (we can't use spdk_bs_iter_next function as our
    4116             :          * last blob has been removed */
    4117          24 :         page_num = bs_blobid_to_page(ctx->blobid);
    4118          24 :         page_num++;
    4119          24 :         page_num = spdk_bit_array_find_first_set(ctx->bs->used_blobids, page_num);
    4120          24 :         if (page_num >= spdk_bit_array_capacity(ctx->bs->used_blobids)) {
    4121          24 :                 bs_load_iter(ctx, NULL, -ENOENT);
    4122          24 :                 return;
    4123             :         }
    4124             : 
    4125           0 :         id = bs_page_to_blobid(page_num);
    4126             : 
    4127           0 :         spdk_bs_open_blob(ctx->bs, id, bs_load_iter, ctx);
    4128             : }
    4129             : 
    4130             : static void
    4131          24 : bs_delete_corrupted_close_cb(void *cb_arg, int bserrno)
    4132             : {
    4133          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4134             : 
    4135          24 :         if (bserrno != 0) {
    4136           0 :                 SPDK_ERRLOG("Failed to close corrupted blob\n");
    4137           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4138           0 :                 return;
    4139             :         }
    4140             : 
    4141          24 :         spdk_bs_delete_blob(ctx->bs, ctx->blobid, bs_delete_corrupted_blob_cpl, ctx);
    4142             : }
    4143             : 
    4144             : static void
    4145          24 : bs_delete_corrupted_blob(void *cb_arg, int bserrno)
    4146             : {
    4147          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4148             :         uint64_t i;
    4149             : 
    4150          24 :         if (bserrno != 0) {
    4151           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4152           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4153           0 :                 return;
    4154             :         }
    4155             : 
    4156             :         /* Snapshot and clone have the same copy of cluster map and extent pages
    4157             :          * at this point. Let's clear both for snapshot now,
    4158             :          * so that it won't be cleared for clone later when we remove snapshot.
    4159             :          * Also set thin provision to pass data corruption check */
    4160         264 :         for (i = 0; i < ctx->blob->active.num_clusters; i++) {
    4161         240 :                 ctx->blob->active.clusters[i] = 0;
    4162             :         }
    4163          36 :         for (i = 0; i < ctx->blob->active.num_extent_pages; i++) {
    4164          12 :                 ctx->blob->active.extent_pages[i] = 0;
    4165             :         }
    4166             : 
    4167          24 :         ctx->blob->active.num_allocated_clusters = 0;
    4168             : 
    4169          24 :         ctx->blob->md_ro = false;
    4170             : 
    4171          24 :         blob_set_thin_provision(ctx->blob);
    4172             : 
    4173          24 :         ctx->blobid = ctx->blob->id;
    4174             : 
    4175          24 :         spdk_blob_close(ctx->blob, bs_delete_corrupted_close_cb, ctx);
    4176             : }
    4177             : 
    4178             : static void
    4179          12 : bs_update_corrupted_blob(void *cb_arg, int bserrno)
    4180             : {
    4181          12 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4182             : 
    4183          12 :         if (bserrno != 0) {
    4184           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4185           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4186           0 :                 return;
    4187             :         }
    4188             : 
    4189          12 :         ctx->blob->md_ro = false;
    4190          12 :         blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
    4191          12 :         blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
    4192          12 :         spdk_blob_set_read_only(ctx->blob);
    4193             : 
    4194          12 :         if (ctx->iter_cb_fn) {
    4195           0 :                 ctx->iter_cb_fn(ctx->iter_cb_arg, ctx->blob, 0);
    4196             :         }
    4197          12 :         bs_blob_list_add(ctx->blob);
    4198             : 
    4199          12 :         spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4200             : }
    4201             : 
    4202             : static void
    4203          36 : bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
    4204             : {
    4205          36 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4206             : 
    4207          36 :         if (bserrno != 0) {
    4208           0 :                 SPDK_ERRLOG("Failed to open clone of a corrupted blob\n");
    4209           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4210           0 :                 return;
    4211             :         }
    4212             : 
    4213          36 :         if (blob->parent_id == ctx->blob->id) {
    4214             :                 /* Power failure occurred before updating clone (snapshot delete case)
    4215             :                  * or after updating clone (creating snapshot case) - keep snapshot */
    4216          12 :                 spdk_blob_close(blob, bs_update_corrupted_blob, ctx);
    4217             :         } else {
    4218             :                 /* Power failure occurred after updating clone (snapshot delete case)
    4219             :                  * or before updating clone (creating snapshot case) - remove snapshot */
    4220          24 :                 spdk_blob_close(blob, bs_delete_corrupted_blob, ctx);
    4221             :         }
    4222             : }
    4223             : 
    4224             : static void
    4225         720 : bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
    4226             : {
    4227         720 :         struct spdk_bs_load_ctx *ctx = arg;
    4228         720 :         const void *value;
    4229         720 :         size_t len;
    4230         720 :         int rc = 0;
    4231             : 
    4232         720 :         if (bserrno == 0) {
    4233             :                 /* Examine blob if it is corrupted after power failure. Fix
    4234             :                  * the ones that can be fixed and remove any other corrupted
    4235             :                  * ones. If it is not corrupted just process it */
    4236         440 :                 rc = blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
    4237         440 :                 if (rc != 0) {
    4238         420 :                         rc = blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
    4239         420 :                         if (rc != 0) {
    4240             :                                 /* Not corrupted - process it and continue with iterating through blobs */
    4241         404 :                                 if (ctx->iter_cb_fn) {
    4242          34 :                                         ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
    4243             :                                 }
    4244         404 :                                 bs_blob_list_add(blob);
    4245         404 :                                 spdk_bs_iter_next(ctx->bs, blob, bs_load_iter, ctx);
    4246         404 :                                 return;
    4247             :                         }
    4248             : 
    4249             :                 }
    4250             : 
    4251          36 :                 assert(len == sizeof(spdk_blob_id));
    4252             : 
    4253          36 :                 ctx->blob = blob;
    4254             : 
    4255             :                 /* Open clone to check if we are able to fix this blob or should we remove it */
    4256          36 :                 spdk_bs_open_blob(ctx->bs, *(spdk_blob_id *)value, bs_examine_clone, ctx);
    4257          36 :                 return;
    4258         280 :         } else if (bserrno == -ENOENT) {
    4259         280 :                 bserrno = 0;
    4260             :         } else {
    4261             :                 /*
    4262             :                  * This case needs to be looked at further.  Same problem
    4263             :                  *  exists with applications that rely on explicit blob
    4264             :                  *  iteration.  We should just skip the blob that failed
    4265             :                  *  to load and continue on to the next one.
    4266             :                  */
    4267           0 :                 SPDK_ERRLOG("Error in iterating blobs\n");
    4268             :         }
    4269             : 
    4270         280 :         ctx->iter_cb_fn = NULL;
    4271             : 
    4272         280 :         spdk_free(ctx->super);
    4273         280 :         spdk_free(ctx->mask);
    4274         280 :         bs_sequence_finish(ctx->seq, bserrno);
    4275         280 :         free(ctx);
    4276             : }
    4277             : 
    4278             : static void bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
    4279             : 
    4280             : static void
    4281         280 : bs_load_complete(struct spdk_bs_load_ctx *ctx)
    4282             : {
    4283         280 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    4284         280 :         if (ctx->dumping) {
    4285           0 :                 bs_dump_read_md_page(ctx->seq, ctx);
    4286           0 :                 return;
    4287             :         }
    4288         280 :         spdk_bs_iter_first(ctx->bs, bs_load_iter, ctx);
    4289             : }
    4290             : 
    4291             : static void
    4292         174 : bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4293             : {
    4294         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4295             :         int rc;
    4296             : 
    4297             :         /* The type must be correct */
    4298         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
    4299             : 
    4300             :         /* The length of the mask (in bits) must not be greater than
    4301             :          * the length of the buffer (converted to bits) */
    4302         174 :         assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE * 8));
    4303             : 
    4304             :         /* The length of the mask must be exactly equal to the size
    4305             :          * (in pages) of the metadata region */
    4306         174 :         assert(ctx->mask->length == ctx->super->md_len);
    4307             : 
    4308         174 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length);
    4309         174 :         if (rc < 0) {
    4310           0 :                 spdk_free(ctx->mask);
    4311           0 :                 bs_load_ctx_fail(ctx, rc);
    4312           0 :                 return;
    4313             :         }
    4314             : 
    4315         174 :         spdk_bit_array_load_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4316         174 :         bs_load_complete(ctx);
    4317             : }
    4318             : 
    4319             : static void
    4320         174 : bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4321             : {
    4322         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4323             :         uint64_t                lba, lba_count, mask_size;
    4324             :         int                     rc;
    4325             : 
    4326         174 :         if (bserrno != 0) {
    4327           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4328           0 :                 return;
    4329             :         }
    4330             : 
    4331             :         /* The type must be correct */
    4332         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    4333             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4334         174 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    4335             :                                              struct spdk_blob_md_page) * 8));
    4336             :         /*
    4337             :          * The length of the mask must be equal to or larger than the total number of clusters. It may be
    4338             :          * larger than the total number of clusters due to a failure spdk_bs_grow.
    4339             :          */
    4340         174 :         assert(ctx->mask->length >= ctx->bs->total_clusters);
    4341         174 :         if (ctx->mask->length > ctx->bs->total_clusters) {
    4342           4 :                 SPDK_WARNLOG("Shrink the used_custers mask length to total_clusters");
    4343           4 :                 ctx->mask->length = ctx->bs->total_clusters;
    4344             :         }
    4345             : 
    4346         174 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->mask->length);
    4347         174 :         if (rc < 0) {
    4348           0 :                 spdk_free(ctx->mask);
    4349           0 :                 bs_load_ctx_fail(ctx, rc);
    4350           0 :                 return;
    4351             :         }
    4352             : 
    4353         174 :         spdk_bit_array_load_mask(ctx->used_clusters, ctx->mask->mask);
    4354         174 :         ctx->bs->num_free_clusters = spdk_bit_array_count_clear(ctx->used_clusters);
    4355         174 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    4356             : 
    4357         174 :         spdk_free(ctx->mask);
    4358             : 
    4359             :         /* Read the used blobids mask */
    4360         174 :         mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
    4361         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4362             :                                  SPDK_MALLOC_DMA);
    4363         174 :         if (!ctx->mask) {
    4364           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4365           0 :                 return;
    4366             :         }
    4367         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4368         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4369         174 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4370             :                              bs_load_used_blobids_cpl, ctx);
    4371             : }
    4372             : 
    4373             : static void
    4374         174 : bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4375             : {
    4376         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4377             :         uint64_t                lba, lba_count, mask_size;
    4378             :         int                     rc;
    4379             : 
    4380         174 :         if (bserrno != 0) {
    4381           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4382           0 :                 return;
    4383             :         }
    4384             : 
    4385             :         /* The type must be correct */
    4386         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES);
    4387             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4388         174 :         assert(ctx->mask->length <= (ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE *
    4389             :                                      8));
    4390             :         /* The length of the mask must be exactly equal to the size (in pages) of the metadata region */
    4391         174 :         if (ctx->mask->length != ctx->super->md_len) {
    4392           0 :                 SPDK_ERRLOG("mismatched md_len in used_pages mask: "
    4393             :                             "mask->length=%" PRIu32 " super->md_len=%" PRIu32 "\n",
    4394             :                             ctx->mask->length, ctx->super->md_len);
    4395           0 :                 assert(false);
    4396             :         }
    4397             : 
    4398         174 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->mask->length);
    4399         174 :         if (rc < 0) {
    4400           0 :                 spdk_free(ctx->mask);
    4401           0 :                 bs_load_ctx_fail(ctx, rc);
    4402           0 :                 return;
    4403             :         }
    4404             : 
    4405         174 :         spdk_bit_array_load_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4406         174 :         spdk_free(ctx->mask);
    4407             : 
    4408             :         /* Read the used clusters mask */
    4409         174 :         mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    4410         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4411             :                                  SPDK_MALLOC_DMA);
    4412         174 :         if (!ctx->mask) {
    4413           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4414           0 :                 return;
    4415             :         }
    4416         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4417         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4418         174 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4419             :                              bs_load_used_clusters_cpl, ctx);
    4420             : }
    4421             : 
    4422             : static void
    4423         174 : bs_load_read_used_pages(struct spdk_bs_load_ctx *ctx)
    4424             : {
    4425             :         uint64_t lba, lba_count, mask_size;
    4426             : 
    4427             :         /* Read the used pages mask */
    4428         174 :         mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
    4429         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4430             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4431         174 :         if (!ctx->mask) {
    4432           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4433           0 :                 return;
    4434             :         }
    4435             : 
    4436         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4437         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4438         174 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    4439             :                              bs_load_used_pages_cpl, ctx);
    4440             : }
    4441             : 
    4442             : static int
    4443         246 : bs_load_replay_md_parse_page(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_page *page)
    4444             : {
    4445         246 :         struct spdk_blob_store *bs = ctx->bs;
    4446             :         struct spdk_blob_md_descriptor *desc;
    4447         246 :         size_t  cur_desc = 0;
    4448             : 
    4449         246 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4450         718 :         while (cur_desc < sizeof(page->descriptors)) {
    4451         718 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    4452         226 :                         if (desc->length == 0) {
    4453             :                                 /* If padding and length are 0, this terminates the page */
    4454         226 :                                 break;
    4455             :                         }
    4456         492 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    4457             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    4458             :                         unsigned int                            i, j;
    4459          68 :                         unsigned int                            cluster_count = 0;
    4460             :                         uint32_t                                cluster_idx;
    4461             : 
    4462          68 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    4463             : 
    4464         136 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    4465         828 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
    4466         760 :                                         cluster_idx = desc_extent_rle->extents[i].cluster_idx;
    4467             :                                         /*
    4468             :                                          * cluster_idx = 0 means an unallocated cluster - don't mark that
    4469             :                                          * in the used cluster map.
    4470             :                                          */
    4471         760 :                                         if (cluster_idx != 0) {
    4472         540 :                                                 SPDK_NOTICELOG("Recover: cluster %" PRIu32 "\n", cluster_idx + j);
    4473         540 :                                                 spdk_bit_array_set(ctx->used_clusters, cluster_idx + j);
    4474         540 :                                                 if (bs->num_free_clusters == 0) {
    4475           0 :                                                         return -ENOSPC;
    4476             :                                                 }
    4477         540 :                                                 bs->num_free_clusters--;
    4478             :                                         }
    4479         760 :                                         cluster_count++;
    4480             :                                 }
    4481             :                         }
    4482          68 :                         if (cluster_count == 0) {
    4483           0 :                                 return -EINVAL;
    4484             :                         }
    4485         424 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4486             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    4487             :                         uint32_t                                        i;
    4488          52 :                         uint32_t                                        cluster_count = 0;
    4489             :                         uint32_t                                        cluster_idx;
    4490             :                         size_t                                          cluster_idx_length;
    4491             : 
    4492          52 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    4493          52 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
    4494             : 
    4495          52 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
    4496          52 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
    4497           0 :                                 return -EINVAL;
    4498             :                         }
    4499             : 
    4500         652 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
    4501         600 :                                 cluster_idx = desc_extent->cluster_idx[i];
    4502             :                                 /*
    4503             :                                  * cluster_idx = 0 means an unallocated cluster - don't mark that
    4504             :                                  * in the used cluster map.
    4505             :                                  */
    4506         600 :                                 if (cluster_idx != 0) {
    4507         600 :                                         if (cluster_idx < desc_extent->start_cluster_idx &&
    4508           0 :                                             cluster_idx >= desc_extent->start_cluster_idx + cluster_count) {
    4509           0 :                                                 return -EINVAL;
    4510             :                                         }
    4511         600 :                                         spdk_bit_array_set(ctx->used_clusters, cluster_idx);
    4512         600 :                                         if (bs->num_free_clusters == 0) {
    4513           0 :                                                 return -ENOSPC;
    4514             :                                         }
    4515         600 :                                         bs->num_free_clusters--;
    4516             :                                 }
    4517         600 :                                 cluster_count++;
    4518             :                         }
    4519             : 
    4520          52 :                         if (cluster_count == 0) {
    4521           0 :                                 return -EINVAL;
    4522             :                         }
    4523         372 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    4524             :                         /* Skip this item */
    4525         296 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    4526             :                         /* Skip this item */
    4527         236 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    4528             :                         /* Skip this item */
    4529          82 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    4530             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
    4531          82 :                         uint32_t num_extent_pages = ctx->num_extent_pages;
    4532             :                         uint32_t i;
    4533             :                         size_t extent_pages_length;
    4534             :                         void *tmp;
    4535             : 
    4536          82 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
    4537          82 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
    4538             : 
    4539          82 :                         if (desc_extent_table->length == 0 ||
    4540          82 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
    4541           0 :                                 return -EINVAL;
    4542             :                         }
    4543             : 
    4544         160 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4545          78 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
    4546          52 :                                         if (desc_extent_table->extent_page[i].num_pages != 1) {
    4547           0 :                                                 return -EINVAL;
    4548             :                                         }
    4549          52 :                                         num_extent_pages += 1;
    4550             :                                 }
    4551             :                         }
    4552             : 
    4553          82 :                         if (num_extent_pages > 0) {
    4554          52 :                                 tmp = realloc(ctx->extent_page_num, num_extent_pages * sizeof(uint32_t));
    4555          52 :                                 if (tmp == NULL) {
    4556           0 :                                         return -ENOMEM;
    4557             :                                 }
    4558          52 :                                 ctx->extent_page_num = tmp;
    4559             : 
    4560             :                                 /* Extent table entries contain md page numbers for extent pages.
    4561             :                                  * Zeroes represent unallocated extent pages, those are run-length-encoded.
    4562             :                                  */
    4563         104 :                                 for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4564          52 :                                         if (desc_extent_table->extent_page[i].page_idx != 0) {
    4565          52 :                                                 ctx->extent_page_num[ctx->num_extent_pages] = desc_extent_table->extent_page[i].page_idx;
    4566          52 :                                                 ctx->num_extent_pages += 1;
    4567             :                                         }
    4568             :                                 }
    4569             :                         }
    4570             :                 } else {
    4571             :                         /* Error */
    4572           0 :                         return -EINVAL;
    4573             :                 }
    4574             :                 /* Advance to the next descriptor */
    4575         492 :                 cur_desc += sizeof(*desc) + desc->length;
    4576         492 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    4577          20 :                         break;
    4578             :                 }
    4579         472 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    4580             :         }
    4581         246 :         return 0;
    4582             : }
    4583             : 
    4584             : static bool
    4585        1296 : bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page)
    4586             : {
    4587             :         uint32_t crc;
    4588        1296 :         struct spdk_blob_md_descriptor *desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4589             :         size_t desc_len;
    4590             : 
    4591        1296 :         crc = blob_md_page_calc_crc(page);
    4592        1296 :         if (crc != page->crc) {
    4593           0 :                 return false;
    4594             :         }
    4595             : 
    4596             :         /* Extent page should always be of sequence num 0. */
    4597        1296 :         if (page->sequence_num != 0) {
    4598          44 :                 return false;
    4599             :         }
    4600             : 
    4601             :         /* Descriptor type must be EXTENT_PAGE. */
    4602        1252 :         if (desc->type != SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4603         154 :                 return false;
    4604             :         }
    4605             : 
    4606             :         /* Descriptor length cannot exceed the page. */
    4607        1098 :         desc_len = sizeof(*desc) + desc->length;
    4608        1098 :         if (desc_len > sizeof(page->descriptors)) {
    4609           0 :                 return false;
    4610             :         }
    4611             : 
    4612             :         /* It has to be the only descriptor in the page. */
    4613        1098 :         if (desc_len + sizeof(*desc) <= sizeof(page->descriptors)) {
    4614        1098 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + desc_len);
    4615        1098 :                 if (desc->length != 0) {
    4616           0 :                         return false;
    4617             :                 }
    4618             :         }
    4619             : 
    4620        1098 :         return true;
    4621             : }
    4622             : 
    4623             : static bool
    4624        6754 : bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
    4625             : {
    4626             :         uint32_t crc;
    4627        6754 :         struct spdk_blob_md_page *page = ctx->page;
    4628             : 
    4629        6754 :         crc = blob_md_page_calc_crc(page);
    4630        6754 :         if (crc != page->crc) {
    4631        6538 :                 return false;
    4632             :         }
    4633             : 
    4634             :         /* First page of a sequence should match the blobid. */
    4635         216 :         if (page->sequence_num == 0 &&
    4636         172 :             bs_page_to_blobid(ctx->cur_page) != page->id) {
    4637          18 :                 return false;
    4638             :         }
    4639         198 :         assert(bs_load_cur_extent_page_valid(page) == false);
    4640             : 
    4641         198 :         return true;
    4642             : }
    4643             : 
    4644             : static void bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx);
    4645             : 
    4646             : static void
    4647         106 : bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4648             : {
    4649         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4650             : 
    4651         106 :         if (bserrno != 0) {
    4652           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4653           0 :                 return;
    4654             :         }
    4655             : 
    4656         106 :         bs_load_complete(ctx);
    4657             : }
    4658             : 
    4659             : static void
    4660         106 : bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4661             : {
    4662         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4663             : 
    4664         106 :         spdk_free(ctx->mask);
    4665         106 :         ctx->mask = NULL;
    4666             : 
    4667         106 :         if (bserrno != 0) {
    4668           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4669           0 :                 return;
    4670             :         }
    4671             : 
    4672         106 :         bs_write_used_clusters(seq, ctx, bs_load_write_used_clusters_cpl);
    4673             : }
    4674             : 
    4675             : static void
    4676         106 : bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4677             : {
    4678         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4679             : 
    4680         106 :         spdk_free(ctx->mask);
    4681         106 :         ctx->mask = NULL;
    4682             : 
    4683         106 :         if (bserrno != 0) {
    4684           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4685           0 :                 return;
    4686             :         }
    4687             : 
    4688         106 :         bs_write_used_blobids(seq, ctx, bs_load_write_used_blobids_cpl);
    4689             : }
    4690             : 
    4691             : static void
    4692         106 : bs_load_write_used_md(struct spdk_bs_load_ctx *ctx)
    4693             : {
    4694         106 :         bs_write_used_md(ctx->seq, ctx, bs_load_write_used_pages_cpl);
    4695         106 : }
    4696             : 
    4697             : static void
    4698        6714 : bs_load_replay_md_chain_cpl(struct spdk_bs_load_ctx *ctx)
    4699             : {
    4700             :         uint64_t num_md_clusters;
    4701             :         uint64_t i;
    4702             : 
    4703        6714 :         ctx->in_page_chain = false;
    4704             : 
    4705             :         do {
    4706        6784 :                 ctx->page_index++;
    4707        6784 :         } while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
    4708             : 
    4709        6714 :         if (ctx->page_index < ctx->super->md_len) {
    4710        6608 :                 ctx->cur_page = ctx->page_index;
    4711        6608 :                 bs_load_replay_cur_md_page(ctx);
    4712             :         } else {
    4713             :                 /* Claim all of the clusters used by the metadata */
    4714         106 :                 num_md_clusters = spdk_divide_round_up(
    4715         106 :                                           ctx->super->md_start + ctx->super->md_len, ctx->bs->pages_per_cluster);
    4716         480 :                 for (i = 0; i < num_md_clusters; i++) {
    4717         374 :                         spdk_bit_array_set(ctx->used_clusters, i);
    4718             :                 }
    4719         106 :                 ctx->bs->num_free_clusters -= num_md_clusters;
    4720         106 :                 spdk_free(ctx->page);
    4721         106 :                 bs_load_write_used_md(ctx);
    4722             :         }
    4723        6714 : }
    4724             : 
    4725             : static void
    4726          52 : bs_load_replay_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4727             : {
    4728          52 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4729             :         uint32_t page_num;
    4730             :         uint64_t i;
    4731             : 
    4732          52 :         if (bserrno != 0) {
    4733           0 :                 spdk_free(ctx->extent_pages);
    4734           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4735           0 :                 return;
    4736             :         }
    4737             : 
    4738         104 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4739             :                 /* Extent pages are only read when present within in chain md.
    4740             :                  * Integrity of md is not right if that page was not a valid extent page. */
    4741          52 :                 if (bs_load_cur_extent_page_valid(&ctx->extent_pages[i]) != true) {
    4742           0 :                         spdk_free(ctx->extent_pages);
    4743           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4744           0 :                         return;
    4745             :                 }
    4746             : 
    4747          52 :                 page_num = ctx->extent_page_num[i];
    4748          52 :                 spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
    4749          52 :                 if (bs_load_replay_md_parse_page(ctx, &ctx->extent_pages[i])) {
    4750           0 :                         spdk_free(ctx->extent_pages);
    4751           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4752           0 :                         return;
    4753             :                 }
    4754             :         }
    4755             : 
    4756          52 :         spdk_free(ctx->extent_pages);
    4757          52 :         free(ctx->extent_page_num);
    4758          52 :         ctx->extent_page_num = NULL;
    4759          52 :         ctx->num_extent_pages = 0;
    4760             : 
    4761          52 :         bs_load_replay_md_chain_cpl(ctx);
    4762             : }
    4763             : 
    4764             : static void
    4765          52 : bs_load_replay_extent_pages(struct spdk_bs_load_ctx *ctx)
    4766             : {
    4767             :         spdk_bs_batch_t *batch;
    4768             :         uint32_t page;
    4769             :         uint64_t lba;
    4770             :         uint64_t i;
    4771             : 
    4772          52 :         ctx->extent_pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE * ctx->num_extent_pages, 0,
    4773             :                                          NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4774          52 :         if (!ctx->extent_pages) {
    4775           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4776           0 :                 return;
    4777             :         }
    4778             : 
    4779          52 :         batch = bs_sequence_to_batch(ctx->seq, bs_load_replay_extent_page_cpl, ctx);
    4780             : 
    4781         104 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4782          52 :                 page = ctx->extent_page_num[i];
    4783          52 :                 assert(page < ctx->super->md_len);
    4784          52 :                 lba = bs_md_page_to_lba(ctx->bs, page);
    4785          52 :                 bs_batch_read_dev(batch, &ctx->extent_pages[i], lba,
    4786          52 :                                   bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE));
    4787             :         }
    4788             : 
    4789          52 :         bs_batch_close(batch);
    4790             : }
    4791             : 
    4792             : static void
    4793        6754 : bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4794             : {
    4795        6754 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4796             :         uint32_t page_num;
    4797             :         struct spdk_blob_md_page *page;
    4798             : 
    4799        6754 :         if (bserrno != 0) {
    4800           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4801           0 :                 return;
    4802             :         }
    4803             : 
    4804        6754 :         page_num = ctx->cur_page;
    4805        6754 :         page = ctx->page;
    4806        6754 :         if (bs_load_cur_md_page_valid(ctx) == true) {
    4807         198 :                 if (page->sequence_num == 0 || ctx->in_page_chain == true) {
    4808         194 :                         spdk_spin_lock(&ctx->bs->used_lock);
    4809         194 :                         bs_claim_md_page(ctx->bs, page_num);
    4810         194 :                         spdk_spin_unlock(&ctx->bs->used_lock);
    4811         194 :                         if (page->sequence_num == 0) {
    4812         154 :                                 SPDK_NOTICELOG("Recover: blob 0x%" PRIx32 "\n", page_num);
    4813         154 :                                 spdk_bit_array_set(ctx->bs->used_blobids, page_num);
    4814             :                         }
    4815         194 :                         if (bs_load_replay_md_parse_page(ctx, page)) {
    4816           0 :                                 bs_load_ctx_fail(ctx, -EILSEQ);
    4817           0 :                                 return;
    4818             :                         }
    4819         194 :                         if (page->next != SPDK_INVALID_MD_PAGE) {
    4820          40 :                                 ctx->in_page_chain = true;
    4821          40 :                                 ctx->cur_page = page->next;
    4822          40 :                                 bs_load_replay_cur_md_page(ctx);
    4823          40 :                                 return;
    4824             :                         }
    4825         154 :                         if (ctx->num_extent_pages != 0) {
    4826          52 :                                 bs_load_replay_extent_pages(ctx);
    4827          52 :                                 return;
    4828             :                         }
    4829             :                 }
    4830             :         }
    4831        6662 :         bs_load_replay_md_chain_cpl(ctx);
    4832             : }
    4833             : 
    4834             : static void
    4835        6754 : bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx)
    4836             : {
    4837             :         uint64_t lba;
    4838             : 
    4839        6754 :         assert(ctx->cur_page < ctx->super->md_len);
    4840        6754 :         lba = bs_md_page_to_lba(ctx->bs, ctx->cur_page);
    4841        6754 :         bs_sequence_read_dev(ctx->seq, ctx->page, lba,
    4842        6754 :                              bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    4843             :                              bs_load_replay_md_cpl, ctx);
    4844        6754 : }
    4845             : 
    4846             : static void
    4847         106 : bs_load_replay_md(struct spdk_bs_load_ctx *ctx)
    4848             : {
    4849         106 :         ctx->page_index = 0;
    4850         106 :         ctx->cur_page = 0;
    4851         106 :         ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    4852             :                                  NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4853         106 :         if (!ctx->page) {
    4854           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4855           0 :                 return;
    4856             :         }
    4857         106 :         bs_load_replay_cur_md_page(ctx);
    4858             : }
    4859             : 
    4860             : static void
    4861         106 : bs_recover(struct spdk_bs_load_ctx *ctx)
    4862             : {
    4863             :         int             rc;
    4864             : 
    4865         106 :         SPDK_NOTICELOG("Performing recovery on blobstore\n");
    4866         106 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
    4867         106 :         if (rc < 0) {
    4868           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4869           0 :                 return;
    4870             :         }
    4871             : 
    4872         106 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
    4873         106 :         if (rc < 0) {
    4874           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4875           0 :                 return;
    4876             :         }
    4877             : 
    4878         106 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4879         106 :         if (rc < 0) {
    4880           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4881           0 :                 return;
    4882             :         }
    4883             : 
    4884         106 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->super->md_len);
    4885         106 :         if (rc < 0) {
    4886           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4887           0 :                 return;
    4888             :         }
    4889             : 
    4890         106 :         ctx->bs->num_free_clusters = ctx->bs->total_clusters;
    4891         106 :         bs_load_replay_md(ctx);
    4892             : }
    4893             : 
    4894             : static int
    4895         276 : bs_parse_super(struct spdk_bs_load_ctx *ctx)
    4896             : {
    4897             :         int rc;
    4898             : 
    4899         276 :         if (ctx->super->size == 0) {
    4900           8 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    4901             :         }
    4902             : 
    4903         276 :         if (ctx->super->io_unit_size == 0) {
    4904           8 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    4905             :         }
    4906             : 
    4907         276 :         ctx->bs->clean = 1;
    4908         276 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    4909         276 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    4910         276 :         ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    4911         276 :         if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
    4912         276 :                 ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
    4913             :         }
    4914         276 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    4915         276 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4916         276 :         if (rc < 0) {
    4917           0 :                 return -ENOMEM;
    4918             :         }
    4919         276 :         ctx->bs->md_start = ctx->super->md_start;
    4920         276 :         ctx->bs->md_len = ctx->super->md_len;
    4921         276 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    4922         276 :         if (rc < 0) {
    4923           0 :                 return -ENOMEM;
    4924             :         }
    4925             : 
    4926         552 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    4927         276 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    4928         276 :         ctx->bs->super_blob = ctx->super->super_blob;
    4929         276 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    4930             : 
    4931         276 :         return 0;
    4932             : }
    4933             : 
    4934             : static void
    4935         300 : bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4936             : {
    4937         300 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4938             :         int rc;
    4939             : 
    4940         300 :         rc = bs_super_validate(ctx->super, ctx->bs);
    4941         300 :         if (rc != 0) {
    4942          24 :                 bs_load_ctx_fail(ctx, rc);
    4943          24 :                 return;
    4944             :         }
    4945             : 
    4946         276 :         rc = bs_parse_super(ctx);
    4947         276 :         if (rc < 0) {
    4948           0 :                 bs_load_ctx_fail(ctx, rc);
    4949           0 :                 return;
    4950             :         }
    4951             : 
    4952         276 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0 || ctx->force_recover) {
    4953         106 :                 bs_recover(ctx);
    4954             :         } else {
    4955         170 :                 bs_load_read_used_pages(ctx);
    4956             :         }
    4957             : }
    4958             : 
    4959             : static inline int
    4960         308 : bs_opts_copy(struct spdk_bs_opts *src, struct spdk_bs_opts *dst)
    4961             : {
    4962             : 
    4963         308 :         if (!src->opts_size) {
    4964           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    4965           0 :                 return -1;
    4966             :         }
    4967             : 
    4968             : #define FIELD_OK(field) \
    4969             :         offsetof(struct spdk_bs_opts, field) + sizeof(src->field) <= src->opts_size
    4970             : 
    4971             : #define SET_FIELD(field) \
    4972             :         if (FIELD_OK(field)) { \
    4973             :                 dst->field = src->field; \
    4974             :         } \
    4975             : 
    4976         308 :         SET_FIELD(cluster_sz);
    4977         308 :         SET_FIELD(num_md_pages);
    4978         308 :         SET_FIELD(max_md_ops);
    4979         308 :         SET_FIELD(max_channel_ops);
    4980         308 :         SET_FIELD(clear_method);
    4981             : 
    4982         308 :         if (FIELD_OK(bstype)) {
    4983         308 :                 memcpy(&dst->bstype, &src->bstype, sizeof(dst->bstype));
    4984             :         }
    4985         308 :         SET_FIELD(iter_cb_fn);
    4986         308 :         SET_FIELD(iter_cb_arg);
    4987         308 :         SET_FIELD(force_recover);
    4988         308 :         SET_FIELD(esnap_bs_dev_create);
    4989         308 :         SET_FIELD(esnap_ctx);
    4990             : 
    4991         308 :         dst->opts_size = src->opts_size;
    4992             : 
    4993             :         /* You should not remove this statement, but need to update the assert statement
    4994             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    4995             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_opts) == 88, "Incorrect size");
    4996             : 
    4997             : #undef FIELD_OK
    4998             : #undef SET_FIELD
    4999             : 
    5000         308 :         return 0;
    5001             : }
    5002             : 
    5003             : void
    5004         312 : spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5005             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5006             : {
    5007         312 :         struct spdk_blob_store  *bs;
    5008         312 :         struct spdk_bs_cpl      cpl;
    5009         312 :         struct spdk_bs_load_ctx *ctx;
    5010         312 :         struct spdk_bs_opts     opts = {};
    5011             :         int err;
    5012             : 
    5013         312 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    5014             : 
    5015         312 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    5016           4 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    5017           4 :                 dev->destroy(dev);
    5018           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5019           4 :                 return;
    5020             :         }
    5021             : 
    5022         308 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5023         308 :         if (o) {
    5024         122 :                 if (bs_opts_copy(o, &opts)) {
    5025           0 :                         return;
    5026             :                 }
    5027             :         }
    5028             : 
    5029         308 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
    5030           8 :                 dev->destroy(dev);
    5031           8 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5032           8 :                 return;
    5033             :         }
    5034             : 
    5035         300 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5036         300 :         if (err) {
    5037           0 :                 dev->destroy(dev);
    5038           0 :                 cb_fn(cb_arg, NULL, err);
    5039           0 :                 return;
    5040             :         }
    5041             : 
    5042         300 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5043         300 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5044         300 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5045         300 :         cpl.u.bs_handle.bs = bs;
    5046             : 
    5047         300 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5048         300 :         if (!ctx->seq) {
    5049           0 :                 spdk_free(ctx->super);
    5050           0 :                 free(ctx);
    5051           0 :                 bs_free(bs);
    5052           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5053           0 :                 return;
    5054             :         }
    5055             : 
    5056             :         /* Read the super block */
    5057         300 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5058         300 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5059             :                              bs_load_super_cpl, ctx);
    5060             : }
    5061             : 
    5062             : /* END spdk_bs_load */
    5063             : 
    5064             : /* START spdk_bs_dump */
    5065             : 
    5066             : static void
    5067           0 : bs_dump_finish(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
    5068             : {
    5069           0 :         spdk_free(ctx->super);
    5070             : 
    5071             :         /*
    5072             :          * We need to defer calling bs_call_cpl() until after
    5073             :          * dev destruction, so tuck these away for later use.
    5074             :          */
    5075           0 :         ctx->bs->unload_err = bserrno;
    5076           0 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5077           0 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5078             : 
    5079           0 :         bs_sequence_finish(seq, 0);
    5080           0 :         bs_free(ctx->bs);
    5081           0 :         free(ctx);
    5082           0 : }
    5083             : 
    5084             : static void
    5085           0 : bs_dump_print_xattr(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5086             : {
    5087             :         struct spdk_blob_md_descriptor_xattr *desc_xattr;
    5088             :         uint32_t i;
    5089             :         const char *type;
    5090             : 
    5091           0 :         desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc;
    5092             : 
    5093           0 :         if (desc_xattr->length !=
    5094             :             sizeof(desc_xattr->name_length) + sizeof(desc_xattr->value_length) +
    5095           0 :             desc_xattr->name_length + desc_xattr->value_length) {
    5096             :         }
    5097             : 
    5098           0 :         memcpy(ctx->xattr_name, desc_xattr->name, desc_xattr->name_length);
    5099           0 :         ctx->xattr_name[desc_xattr->name_length] = '\0';
    5100           0 :         if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5101           0 :                 type = "XATTR";
    5102           0 :         } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5103           0 :                 type = "XATTR_INTERNAL";
    5104             :         } else {
    5105           0 :                 assert(false);
    5106             :                 type = "XATTR_?";
    5107             :         }
    5108           0 :         fprintf(ctx->fp, "%s: name = \"%s\"\n", type, ctx->xattr_name);
    5109           0 :         fprintf(ctx->fp, "       value = \"");
    5110           0 :         ctx->print_xattr_fn(ctx->fp, ctx->super->bstype.bstype, ctx->xattr_name,
    5111           0 :                             (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
    5112           0 :                             desc_xattr->value_length);
    5113           0 :         fprintf(ctx->fp, "\"\n");
    5114           0 :         for (i = 0; i < desc_xattr->value_length; i++) {
    5115           0 :                 if (i % 16 == 0) {
    5116           0 :                         fprintf(ctx->fp, "               ");
    5117             :                 }
    5118           0 :                 fprintf(ctx->fp, "%02" PRIx8 " ", *((uint8_t *)desc_xattr->name + desc_xattr->name_length + i));
    5119           0 :                 if ((i + 1) % 16 == 0) {
    5120           0 :                         fprintf(ctx->fp, "\n");
    5121             :                 }
    5122             :         }
    5123           0 :         if (i % 16 != 0) {
    5124           0 :                 fprintf(ctx->fp, "\n");
    5125             :         }
    5126           0 : }
    5127             : 
    5128             : struct type_flag_desc {
    5129             :         uint64_t mask;
    5130             :         uint64_t val;
    5131             :         const char *name;
    5132             : };
    5133             : 
    5134             : static void
    5135           0 : bs_dump_print_type_bits(struct spdk_bs_load_ctx *ctx, uint64_t flags,
    5136             :                         struct type_flag_desc *desc, size_t numflags)
    5137             : {
    5138           0 :         uint64_t covered = 0;
    5139             :         size_t i;
    5140             : 
    5141           0 :         for (i = 0; i < numflags; i++) {
    5142           0 :                 if ((desc[i].mask & flags) != desc[i].val) {
    5143           0 :                         continue;
    5144             :                 }
    5145           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " %s", desc[i].val, desc[i].name);
    5146           0 :                 if (desc[i].mask != desc[i].val) {
    5147           0 :                         fprintf(ctx->fp, " (mask 0x%" PRIx64 " value 0x%" PRIx64 ")",
    5148           0 :                                 desc[i].mask, desc[i].val);
    5149             :                 }
    5150           0 :                 fprintf(ctx->fp, "\n");
    5151           0 :                 covered |= desc[i].mask;
    5152             :         }
    5153           0 :         if ((flags & ~covered) != 0) {
    5154           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " Unknown\n", flags & ~covered);
    5155             :         }
    5156           0 : }
    5157             : 
    5158             : static void
    5159           0 : bs_dump_print_type_flags(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5160             : {
    5161             :         struct spdk_blob_md_descriptor_flags *type_desc;
    5162             : #define ADD_FLAG(f) { f, f, #f }
    5163             : #define ADD_MASK_VAL(m, v) { m, v, #v }
    5164             :         static struct type_flag_desc invalid[] = {
    5165             :                 ADD_FLAG(SPDK_BLOB_THIN_PROV),
    5166             :                 ADD_FLAG(SPDK_BLOB_INTERNAL_XATTR),
    5167             :                 ADD_FLAG(SPDK_BLOB_EXTENT_TABLE),
    5168             :         };
    5169             :         static struct type_flag_desc data_ro[] = {
    5170             :                 ADD_FLAG(SPDK_BLOB_READ_ONLY),
    5171             :         };
    5172             :         static struct type_flag_desc md_ro[] = {
    5173             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_DEFAULT),
    5174             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_NONE),
    5175             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_UNMAP),
    5176             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_WRITE_ZEROES),
    5177             :         };
    5178             : #undef ADD_FLAG
    5179             : #undef ADD_MASK_VAL
    5180             : 
    5181           0 :         type_desc = (struct spdk_blob_md_descriptor_flags *)desc;
    5182           0 :         fprintf(ctx->fp, "Flags:\n");
    5183           0 :         fprintf(ctx->fp, "\tinvalid: 0x%016" PRIx64 "\n", type_desc->invalid_flags);
    5184           0 :         bs_dump_print_type_bits(ctx, type_desc->invalid_flags, invalid,
    5185             :                                 SPDK_COUNTOF(invalid));
    5186           0 :         fprintf(ctx->fp, "\tdata_ro: 0x%016" PRIx64 "\n", type_desc->data_ro_flags);
    5187           0 :         bs_dump_print_type_bits(ctx, type_desc->data_ro_flags, data_ro,
    5188             :                                 SPDK_COUNTOF(data_ro));
    5189           0 :         fprintf(ctx->fp, "\t  md_ro: 0x%016" PRIx64 "\n", type_desc->md_ro_flags);
    5190           0 :         bs_dump_print_type_bits(ctx, type_desc->md_ro_flags, md_ro,
    5191             :                                 SPDK_COUNTOF(md_ro));
    5192           0 : }
    5193             : 
    5194             : static void
    5195           0 : bs_dump_print_extent_table(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5196             : {
    5197             :         struct spdk_blob_md_descriptor_extent_table *et_desc;
    5198             :         uint64_t num_extent_pages;
    5199             :         uint32_t et_idx;
    5200             : 
    5201           0 :         et_desc = (struct spdk_blob_md_descriptor_extent_table *)desc;
    5202           0 :         num_extent_pages = (et_desc->length - sizeof(et_desc->num_clusters)) /
    5203             :                            sizeof(et_desc->extent_page[0]);
    5204             : 
    5205           0 :         fprintf(ctx->fp, "Extent table:\n");
    5206           0 :         for (et_idx = 0; et_idx < num_extent_pages; et_idx++) {
    5207           0 :                 if (et_desc->extent_page[et_idx].page_idx == 0) {
    5208             :                         /* Zeroes represent unallocated extent pages. */
    5209           0 :                         continue;
    5210             :                 }
    5211           0 :                 fprintf(ctx->fp, "\tExtent page: %5" PRIu32 " length %3" PRIu32
    5212             :                         " at LBA %" PRIu64 "\n", et_desc->extent_page[et_idx].page_idx,
    5213             :                         et_desc->extent_page[et_idx].num_pages,
    5214             :                         bs_md_page_to_lba(ctx->bs, et_desc->extent_page[et_idx].page_idx));
    5215             :         }
    5216           0 : }
    5217             : 
    5218             : static void
    5219           0 : bs_dump_print_md_page(struct spdk_bs_load_ctx *ctx)
    5220             : {
    5221           0 :         uint32_t page_idx = ctx->cur_page;
    5222           0 :         struct spdk_blob_md_page *page = ctx->page;
    5223             :         struct spdk_blob_md_descriptor *desc;
    5224           0 :         size_t cur_desc = 0;
    5225             :         uint32_t crc;
    5226             : 
    5227           0 :         fprintf(ctx->fp, "=========\n");
    5228           0 :         fprintf(ctx->fp, "Metadata Page Index: %" PRIu32 " (0x%" PRIx32 ")\n", page_idx, page_idx);
    5229           0 :         fprintf(ctx->fp, "Start LBA: %" PRIu64 "\n", bs_md_page_to_lba(ctx->bs, page_idx));
    5230           0 :         fprintf(ctx->fp, "Blob ID: 0x%" PRIx64 "\n", page->id);
    5231           0 :         fprintf(ctx->fp, "Sequence: %" PRIu32 "\n", page->sequence_num);
    5232           0 :         if (page->next == SPDK_INVALID_MD_PAGE) {
    5233           0 :                 fprintf(ctx->fp, "Next: None\n");
    5234             :         } else {
    5235           0 :                 fprintf(ctx->fp, "Next: %" PRIu32 "\n", page->next);
    5236             :         }
    5237           0 :         fprintf(ctx->fp, "In used bit array%s:", ctx->super->clean ? "" : " (not clean: dubious)");
    5238           0 :         if (spdk_bit_array_get(ctx->bs->used_md_pages, page_idx)) {
    5239           0 :                 fprintf(ctx->fp, " md");
    5240             :         }
    5241           0 :         if (spdk_bit_array_get(ctx->bs->used_blobids, page_idx)) {
    5242           0 :                 fprintf(ctx->fp, " blob");
    5243             :         }
    5244           0 :         fprintf(ctx->fp, "\n");
    5245             : 
    5246           0 :         crc = blob_md_page_calc_crc(page);
    5247           0 :         fprintf(ctx->fp, "CRC: 0x%" PRIx32 " (%s)\n", page->crc, crc == page->crc ? "OK" : "Mismatch");
    5248             : 
    5249           0 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    5250           0 :         while (cur_desc < sizeof(page->descriptors)) {
    5251           0 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    5252           0 :                         if (desc->length == 0) {
    5253             :                                 /* If padding and length are 0, this terminates the page */
    5254           0 :                                 break;
    5255             :                         }
    5256           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    5257             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    5258             :                         unsigned int                            i;
    5259             : 
    5260           0 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    5261             : 
    5262           0 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    5263           0 :                                 if (desc_extent_rle->extents[i].cluster_idx != 0) {
    5264           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5265             :                                                 desc_extent_rle->extents[i].cluster_idx);
    5266             :                                 } else {
    5267           0 :                                         fprintf(ctx->fp, "Unallocated Extent - ");
    5268             :                                 }
    5269           0 :                                 fprintf(ctx->fp, " Length: %" PRIu32, desc_extent_rle->extents[i].length);
    5270           0 :                                 fprintf(ctx->fp, "\n");
    5271             :                         }
    5272           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    5273             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    5274             :                         unsigned int                                    i;
    5275             : 
    5276           0 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    5277             : 
    5278           0 :                         for (i = 0; i < desc_extent->length / sizeof(desc_extent->cluster_idx[0]); i++) {
    5279           0 :                                 if (desc_extent->cluster_idx[i] != 0) {
    5280           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5281             :                                                 desc_extent->cluster_idx[i]);
    5282             :                                 } else {
    5283           0 :                                         fprintf(ctx->fp, "Unallocated Extent");
    5284             :                                 }
    5285           0 :                                 fprintf(ctx->fp, "\n");
    5286             :                         }
    5287           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5288           0 :                         bs_dump_print_xattr(ctx, desc);
    5289           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5290           0 :                         bs_dump_print_xattr(ctx, desc);
    5291           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    5292           0 :                         bs_dump_print_type_flags(ctx, desc);
    5293           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    5294           0 :                         bs_dump_print_extent_table(ctx, desc);
    5295             :                 } else {
    5296             :                         /* Error */
    5297           0 :                         fprintf(ctx->fp, "Unknown descriptor type %" PRIu8 "\n", desc->type);
    5298             :                 }
    5299             :                 /* Advance to the next descriptor */
    5300           0 :                 cur_desc += sizeof(*desc) + desc->length;
    5301           0 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    5302           0 :                         break;
    5303             :                 }
    5304           0 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    5305             :         }
    5306           0 : }
    5307             : 
    5308             : static void
    5309           0 : bs_dump_read_md_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5310             : {
    5311           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5312             : 
    5313           0 :         if (bserrno != 0) {
    5314           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5315           0 :                 return;
    5316             :         }
    5317             : 
    5318           0 :         if (ctx->page->id != 0) {
    5319           0 :                 bs_dump_print_md_page(ctx);
    5320             :         }
    5321             : 
    5322           0 :         ctx->cur_page++;
    5323             : 
    5324           0 :         if (ctx->cur_page < ctx->super->md_len) {
    5325           0 :                 bs_dump_read_md_page(seq, ctx);
    5326             :         } else {
    5327           0 :                 spdk_free(ctx->page);
    5328           0 :                 bs_dump_finish(seq, ctx, 0);
    5329             :         }
    5330             : }
    5331             : 
    5332             : static void
    5333           0 : bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
    5334             : {
    5335           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5336             :         uint64_t lba;
    5337             : 
    5338           0 :         assert(ctx->cur_page < ctx->super->md_len);
    5339           0 :         lba = bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
    5340           0 :         bs_sequence_read_dev(seq, ctx->page, lba,
    5341           0 :                              bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    5342             :                              bs_dump_read_md_page_cpl, ctx);
    5343           0 : }
    5344             : 
    5345             : static void
    5346           0 : bs_dump_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5347             : {
    5348           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5349             :         int rc;
    5350             : 
    5351           0 :         fprintf(ctx->fp, "Signature: \"%.8s\" ", ctx->super->signature);
    5352           0 :         if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5353             :                    sizeof(ctx->super->signature)) != 0) {
    5354           0 :                 fprintf(ctx->fp, "(Mismatch)\n");
    5355           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5356           0 :                 return;
    5357             :         } else {
    5358           0 :                 fprintf(ctx->fp, "(OK)\n");
    5359             :         }
    5360           0 :         fprintf(ctx->fp, "Version: %" PRIu32 "\n", ctx->super->version);
    5361           0 :         fprintf(ctx->fp, "CRC: 0x%x (%s)\n", ctx->super->crc,
    5362           0 :                 (ctx->super->crc == blob_md_page_calc_crc(ctx->super)) ? "OK" : "Mismatch");
    5363           0 :         fprintf(ctx->fp, "Blobstore Type: %.*s\n", SPDK_BLOBSTORE_TYPE_LENGTH, ctx->super->bstype.bstype);
    5364           0 :         fprintf(ctx->fp, "Cluster Size: %" PRIu32 "\n", ctx->super->cluster_size);
    5365           0 :         fprintf(ctx->fp, "Super Blob ID: ");
    5366           0 :         if (ctx->super->super_blob == SPDK_BLOBID_INVALID) {
    5367           0 :                 fprintf(ctx->fp, "(None)\n");
    5368             :         } else {
    5369           0 :                 fprintf(ctx->fp, "0x%" PRIx64 "\n", ctx->super->super_blob);
    5370             :         }
    5371           0 :         fprintf(ctx->fp, "Clean: %" PRIu32 "\n", ctx->super->clean);
    5372           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Start: %" PRIu32 "\n", ctx->super->used_page_mask_start);
    5373           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Length: %" PRIu32 "\n", ctx->super->used_page_mask_len);
    5374           0 :         fprintf(ctx->fp, "Used Cluster Mask Start: %" PRIu32 "\n", ctx->super->used_cluster_mask_start);
    5375           0 :         fprintf(ctx->fp, "Used Cluster Mask Length: %" PRIu32 "\n", ctx->super->used_cluster_mask_len);
    5376           0 :         fprintf(ctx->fp, "Used Blob ID Mask Start: %" PRIu32 "\n", ctx->super->used_blobid_mask_start);
    5377           0 :         fprintf(ctx->fp, "Used Blob ID Mask Length: %" PRIu32 "\n", ctx->super->used_blobid_mask_len);
    5378           0 :         fprintf(ctx->fp, "Metadata Start: %" PRIu32 "\n", ctx->super->md_start);
    5379           0 :         fprintf(ctx->fp, "Metadata Length: %" PRIu32 "\n", ctx->super->md_len);
    5380             : 
    5381           0 :         ctx->cur_page = 0;
    5382           0 :         ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    5383             :                                  NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5384           0 :         if (!ctx->page) {
    5385           0 :                 bs_dump_finish(seq, ctx, -ENOMEM);
    5386           0 :                 return;
    5387             :         }
    5388             : 
    5389           0 :         rc = bs_parse_super(ctx);
    5390           0 :         if (rc < 0) {
    5391           0 :                 bs_load_ctx_fail(ctx, rc);
    5392           0 :                 return;
    5393             :         }
    5394             : 
    5395           0 :         bs_load_read_used_pages(ctx);
    5396             : }
    5397             : 
    5398             : void
    5399           0 : spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
    5400             :              spdk_bs_op_complete cb_fn, void *cb_arg)
    5401             : {
    5402           0 :         struct spdk_blob_store  *bs;
    5403           0 :         struct spdk_bs_cpl      cpl;
    5404           0 :         struct spdk_bs_load_ctx *ctx;
    5405           0 :         struct spdk_bs_opts     opts = {};
    5406             :         int err;
    5407             : 
    5408           0 :         SPDK_DEBUGLOG(blob, "Dumping blobstore from dev %p\n", dev);
    5409             : 
    5410           0 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5411             : 
    5412           0 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5413           0 :         if (err) {
    5414           0 :                 dev->destroy(dev);
    5415           0 :                 cb_fn(cb_arg, err);
    5416           0 :                 return;
    5417             :         }
    5418             : 
    5419           0 :         ctx->dumping = true;
    5420           0 :         ctx->fp = fp;
    5421           0 :         ctx->print_xattr_fn = print_xattr_fn;
    5422             : 
    5423           0 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5424           0 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5425           0 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5426             : 
    5427           0 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5428           0 :         if (!ctx->seq) {
    5429           0 :                 spdk_free(ctx->super);
    5430           0 :                 free(ctx);
    5431           0 :                 bs_free(bs);
    5432           0 :                 cb_fn(cb_arg, -ENOMEM);
    5433           0 :                 return;
    5434             :         }
    5435             : 
    5436             :         /* Read the super block */
    5437           0 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5438           0 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5439             :                              bs_dump_super_cpl, ctx);
    5440             : }
    5441             : 
    5442             : /* END spdk_bs_dump */
    5443             : 
    5444             : /* START spdk_bs_init */
    5445             : 
    5446             : static void
    5447         472 : bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5448             : {
    5449         472 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5450             : 
    5451         472 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    5452         472 :         spdk_free(ctx->super);
    5453         472 :         free(ctx);
    5454             : 
    5455         472 :         bs_sequence_finish(seq, bserrno);
    5456         472 : }
    5457             : 
    5458             : static void
    5459         472 : bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5460             : {
    5461         472 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5462             : 
    5463             :         /* Write super block */
    5464         472 :         bs_sequence_write_dev(seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    5465         472 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    5466             :                               bs_init_persist_super_cpl, ctx);
    5467         472 : }
    5468             : 
    5469             : void
    5470         488 : spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5471             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5472             : {
    5473         488 :         struct spdk_bs_load_ctx *ctx;
    5474         488 :         struct spdk_blob_store  *bs;
    5475         488 :         struct spdk_bs_cpl      cpl;
    5476             :         spdk_bs_sequence_t      *seq;
    5477             :         spdk_bs_batch_t         *batch;
    5478             :         uint64_t                num_md_lba;
    5479             :         uint64_t                num_md_pages;
    5480             :         uint64_t                num_md_clusters;
    5481             :         uint64_t                max_used_cluster_mask_len;
    5482             :         uint32_t                i;
    5483         488 :         struct spdk_bs_opts     opts = {};
    5484             :         int                     rc;
    5485             :         uint64_t                lba, lba_count;
    5486             : 
    5487         488 :         SPDK_DEBUGLOG(blob, "Initializing blobstore on dev %p\n", dev);
    5488             : 
    5489         488 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    5490           4 :                 SPDK_ERRLOG("unsupported dev block length of %d\n",
    5491             :                             dev->blocklen);
    5492           4 :                 dev->destroy(dev);
    5493           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5494           4 :                 return;
    5495             :         }
    5496             : 
    5497         484 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5498         484 :         if (o) {
    5499         182 :                 if (bs_opts_copy(o, &opts)) {
    5500           0 :                         return;
    5501             :                 }
    5502             :         }
    5503             : 
    5504         484 :         if (bs_opts_verify(&opts) != 0) {
    5505           4 :                 dev->destroy(dev);
    5506           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5507           4 :                 return;
    5508             :         }
    5509             : 
    5510         480 :         rc = bs_alloc(dev, &opts, &bs, &ctx);
    5511         480 :         if (rc) {
    5512           4 :                 dev->destroy(dev);
    5513           4 :                 cb_fn(cb_arg, NULL, rc);
    5514           4 :                 return;
    5515             :         }
    5516             : 
    5517         476 :         if (opts.num_md_pages == SPDK_BLOB_OPTS_NUM_MD_PAGES) {
    5518             :                 /* By default, allocate 1 page per cluster.
    5519             :                  * Technically, this over-allocates metadata
    5520             :                  * because more metadata will reduce the number
    5521             :                  * of usable clusters. This can be addressed with
    5522             :                  * more complex math in the future.
    5523             :                  */
    5524         468 :                 bs->md_len = bs->total_clusters;
    5525             :         } else {
    5526           8 :                 bs->md_len = opts.num_md_pages;
    5527             :         }
    5528         476 :         rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len);
    5529         476 :         if (rc < 0) {
    5530           0 :                 spdk_free(ctx->super);
    5531           0 :                 free(ctx);
    5532           0 :                 bs_free(bs);
    5533           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5534           0 :                 return;
    5535             :         }
    5536             : 
    5537         476 :         rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
    5538         476 :         if (rc < 0) {
    5539           0 :                 spdk_free(ctx->super);
    5540           0 :                 free(ctx);
    5541           0 :                 bs_free(bs);
    5542           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5543           0 :                 return;
    5544             :         }
    5545             : 
    5546         476 :         rc = spdk_bit_array_resize(&bs->open_blobids, bs->md_len);
    5547         476 :         if (rc < 0) {
    5548           0 :                 spdk_free(ctx->super);
    5549           0 :                 free(ctx);
    5550           0 :                 bs_free(bs);
    5551           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5552           0 :                 return;
    5553             :         }
    5554             : 
    5555         476 :         memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5556             :                sizeof(ctx->super->signature));
    5557         476 :         ctx->super->version = SPDK_BS_VERSION;
    5558         476 :         ctx->super->length = sizeof(*ctx->super);
    5559         476 :         ctx->super->super_blob = bs->super_blob;
    5560         476 :         ctx->super->clean = 0;
    5561         476 :         ctx->super->cluster_size = bs->cluster_sz;
    5562         476 :         ctx->super->io_unit_size = bs->io_unit_size;
    5563         476 :         memcpy(&ctx->super->bstype, &bs->bstype, sizeof(bs->bstype));
    5564             : 
    5565             :         /* Calculate how many pages the metadata consumes at the front
    5566             :          * of the disk.
    5567             :          */
    5568             : 
    5569             :         /* The super block uses 1 page */
    5570         476 :         num_md_pages = 1;
    5571             : 
    5572             :         /* The used_md_pages mask requires 1 bit per metadata page, rounded
    5573             :          * up to the nearest page, plus a header.
    5574             :          */
    5575         476 :         ctx->super->used_page_mask_start = num_md_pages;
    5576         476 :         ctx->super->used_page_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5577         476 :                                          spdk_divide_round_up(bs->md_len, 8),
    5578             :                                          SPDK_BS_PAGE_SIZE);
    5579         476 :         num_md_pages += ctx->super->used_page_mask_len;
    5580             : 
    5581             :         /* The used_clusters mask requires 1 bit per cluster, rounded
    5582             :          * up to the nearest page, plus a header.
    5583             :          */
    5584         476 :         ctx->super->used_cluster_mask_start = num_md_pages;
    5585         476 :         ctx->super->used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5586         476 :                                             spdk_divide_round_up(bs->total_clusters, 8),
    5587             :                                             SPDK_BS_PAGE_SIZE);
    5588             :         /* The blobstore might be extended, then the used_cluster bitmap will need more space.
    5589             :          * Here we calculate the max clusters we can support according to the
    5590             :          * num_md_pages (bs->md_len).
    5591             :          */
    5592         476 :         max_used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5593         476 :                                     spdk_divide_round_up(bs->md_len, 8),
    5594             :                                     SPDK_BS_PAGE_SIZE);
    5595         476 :         max_used_cluster_mask_len = spdk_max(max_used_cluster_mask_len,
    5596             :                                              ctx->super->used_cluster_mask_len);
    5597         476 :         num_md_pages += max_used_cluster_mask_len;
    5598             : 
    5599             :         /* The used_blobids mask requires 1 bit per metadata page, rounded
    5600             :          * up to the nearest page, plus a header.
    5601             :          */
    5602         476 :         ctx->super->used_blobid_mask_start = num_md_pages;
    5603         476 :         ctx->super->used_blobid_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5604         476 :                                            spdk_divide_round_up(bs->md_len, 8),
    5605             :                                            SPDK_BS_PAGE_SIZE);
    5606         476 :         num_md_pages += ctx->super->used_blobid_mask_len;
    5607             : 
    5608             :         /* The metadata region size was chosen above */
    5609         476 :         ctx->super->md_start = bs->md_start = num_md_pages;
    5610         476 :         ctx->super->md_len = bs->md_len;
    5611         476 :         num_md_pages += bs->md_len;
    5612             : 
    5613         476 :         num_md_lba = bs_page_to_lba(bs, num_md_pages);
    5614             : 
    5615         476 :         ctx->super->size = dev->blockcnt * dev->blocklen;
    5616             : 
    5617         476 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    5618             : 
    5619         476 :         num_md_clusters = spdk_divide_round_up(num_md_pages, bs->pages_per_cluster);
    5620         476 :         if (num_md_clusters > bs->total_clusters) {
    5621           4 :                 SPDK_ERRLOG("Blobstore metadata cannot use more clusters than is available, "
    5622             :                             "please decrease number of pages reserved for metadata "
    5623             :                             "or increase cluster size.\n");
    5624           4 :                 spdk_free(ctx->super);
    5625           4 :                 spdk_bit_array_free(&ctx->used_clusters);
    5626           4 :                 free(ctx);
    5627           4 :                 bs_free(bs);
    5628           4 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5629           4 :                 return;
    5630             :         }
    5631             :         /* Claim all of the clusters used by the metadata */
    5632       75700 :         for (i = 0; i < num_md_clusters; i++) {
    5633       75228 :                 spdk_bit_array_set(ctx->used_clusters, i);
    5634             :         }
    5635             : 
    5636         472 :         bs->num_free_clusters -= num_md_clusters;
    5637         472 :         bs->total_data_clusters = bs->num_free_clusters;
    5638             : 
    5639         472 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5640         472 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5641         472 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5642         472 :         cpl.u.bs_handle.bs = bs;
    5643             : 
    5644         472 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5645         472 :         if (!seq) {
    5646           0 :                 spdk_free(ctx->super);
    5647           0 :                 free(ctx);
    5648           0 :                 bs_free(bs);
    5649           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5650           0 :                 return;
    5651             :         }
    5652             : 
    5653         472 :         batch = bs_sequence_to_batch(seq, bs_init_trim_cpl, ctx);
    5654             : 
    5655             :         /* Clear metadata space */
    5656         472 :         bs_batch_write_zeroes_dev(batch, 0, num_md_lba);
    5657             : 
    5658         472 :         lba = num_md_lba;
    5659         472 :         lba_count = ctx->bs->dev->blockcnt - lba;
    5660         472 :         switch (opts.clear_method) {
    5661         456 :         case BS_CLEAR_WITH_UNMAP:
    5662             :                 /* Trim data clusters */
    5663         456 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    5664         456 :                 break;
    5665           0 :         case BS_CLEAR_WITH_WRITE_ZEROES:
    5666             :                 /* Write_zeroes to data clusters */
    5667           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    5668           0 :                 break;
    5669          16 :         case BS_CLEAR_WITH_NONE:
    5670             :         default:
    5671          16 :                 break;
    5672             :         }
    5673             : 
    5674         472 :         bs_batch_close(batch);
    5675             : }
    5676             : 
    5677             : /* END spdk_bs_init */
    5678             : 
    5679             : /* START spdk_bs_destroy */
    5680             : 
    5681             : static void
    5682           4 : bs_destroy_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5683             : {
    5684           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5685           4 :         struct spdk_blob_store *bs = ctx->bs;
    5686             : 
    5687             :         /*
    5688             :          * We need to defer calling bs_call_cpl() until after
    5689             :          * dev destruction, so tuck these away for later use.
    5690             :          */
    5691           4 :         bs->unload_err = bserrno;
    5692           4 :         memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5693           4 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5694             : 
    5695           4 :         bs_sequence_finish(seq, bserrno);
    5696             : 
    5697           4 :         bs_free(bs);
    5698           4 :         free(ctx);
    5699           4 : }
    5700             : 
    5701             : void
    5702           4 : spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
    5703             :                 void *cb_arg)
    5704             : {
    5705           4 :         struct spdk_bs_cpl      cpl;
    5706             :         spdk_bs_sequence_t      *seq;
    5707             :         struct spdk_bs_load_ctx *ctx;
    5708             : 
    5709           4 :         SPDK_DEBUGLOG(blob, "Destroying blobstore\n");
    5710             : 
    5711           4 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5712           0 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5713           0 :                 cb_fn(cb_arg, -EBUSY);
    5714           0 :                 return;
    5715             :         }
    5716             : 
    5717           4 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5718           4 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5719           4 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5720             : 
    5721           4 :         ctx = calloc(1, sizeof(*ctx));
    5722           4 :         if (!ctx) {
    5723           0 :                 cb_fn(cb_arg, -ENOMEM);
    5724           0 :                 return;
    5725             :         }
    5726             : 
    5727           4 :         ctx->bs = bs;
    5728             : 
    5729           4 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5730           4 :         if (!seq) {
    5731           0 :                 free(ctx);
    5732           0 :                 cb_fn(cb_arg, -ENOMEM);
    5733           0 :                 return;
    5734             :         }
    5735             : 
    5736             :         /* Write zeroes to the super block */
    5737           4 :         bs_sequence_write_zeroes_dev(seq,
    5738             :                                      bs_page_to_lba(bs, 0),
    5739             :                                      bs_byte_to_lba(bs, sizeof(struct spdk_bs_super_block)),
    5740             :                                      bs_destroy_trim_cpl, ctx);
    5741             : }
    5742             : 
    5743             : /* END spdk_bs_destroy */
    5744             : 
    5745             : /* START spdk_bs_unload */
    5746             : 
    5747             : static void
    5748         654 : bs_unload_finish(struct spdk_bs_load_ctx *ctx, int bserrno)
    5749             : {
    5750         654 :         spdk_bs_sequence_t *seq = ctx->seq;
    5751             : 
    5752         654 :         spdk_free(ctx->super);
    5753             : 
    5754             :         /*
    5755             :          * We need to defer calling bs_call_cpl() until after
    5756             :          * dev destruction, so tuck these away for later use.
    5757             :          */
    5758         654 :         ctx->bs->unload_err = bserrno;
    5759         654 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5760         654 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5761             : 
    5762         654 :         bs_sequence_finish(seq, bserrno);
    5763             : 
    5764         654 :         bs_free(ctx->bs);
    5765         654 :         free(ctx);
    5766         654 : }
    5767             : 
    5768             : static void
    5769         654 : bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5770             : {
    5771         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5772             : 
    5773         654 :         bs_unload_finish(ctx, bserrno);
    5774         654 : }
    5775             : 
    5776             : static void
    5777         654 : bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5778             : {
    5779         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5780             : 
    5781         654 :         spdk_free(ctx->mask);
    5782             : 
    5783         654 :         if (bserrno != 0) {
    5784           0 :                 bs_unload_finish(ctx, bserrno);
    5785           0 :                 return;
    5786             :         }
    5787             : 
    5788         654 :         ctx->super->clean = 1;
    5789             : 
    5790         654 :         bs_write_super(seq, ctx->bs, ctx->super, bs_unload_write_super_cpl, ctx);
    5791             : }
    5792             : 
    5793             : static void
    5794         654 : bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5795             : {
    5796         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5797             : 
    5798         654 :         spdk_free(ctx->mask);
    5799         654 :         ctx->mask = NULL;
    5800             : 
    5801         654 :         if (bserrno != 0) {
    5802           0 :                 bs_unload_finish(ctx, bserrno);
    5803           0 :                 return;
    5804             :         }
    5805             : 
    5806         654 :         bs_write_used_clusters(seq, ctx, bs_unload_write_used_clusters_cpl);
    5807             : }
    5808             : 
    5809             : static void
    5810         654 : bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5811             : {
    5812         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5813             : 
    5814         654 :         spdk_free(ctx->mask);
    5815         654 :         ctx->mask = NULL;
    5816             : 
    5817         654 :         if (bserrno != 0) {
    5818           0 :                 bs_unload_finish(ctx, bserrno);
    5819           0 :                 return;
    5820             :         }
    5821             : 
    5822         654 :         bs_write_used_blobids(seq, ctx, bs_unload_write_used_blobids_cpl);
    5823             : }
    5824             : 
    5825             : static void
    5826         654 : bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5827             : {
    5828         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5829             :         int rc;
    5830             : 
    5831         654 :         if (bserrno != 0) {
    5832           0 :                 bs_unload_finish(ctx, bserrno);
    5833           0 :                 return;
    5834             :         }
    5835             : 
    5836         654 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5837         654 :         if (rc != 0) {
    5838           0 :                 bs_unload_finish(ctx, rc);
    5839           0 :                 return;
    5840             :         }
    5841             : 
    5842         654 :         bs_write_used_md(seq, cb_arg, bs_unload_write_used_pages_cpl);
    5843             : }
    5844             : 
    5845             : void
    5846         662 : spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg)
    5847             : {
    5848         662 :         struct spdk_bs_cpl      cpl;
    5849             :         struct spdk_bs_load_ctx *ctx;
    5850             : 
    5851         662 :         SPDK_DEBUGLOG(blob, "Syncing blobstore\n");
    5852             : 
    5853             :         /*
    5854             :          * If external snapshot channels are being destroyed while the blobstore is unloaded, the
    5855             :          * unload is deferred until after the channel destruction completes.
    5856             :          */
    5857         662 :         if (bs->esnap_channels_unloading != 0) {
    5858           4 :                 if (bs->esnap_unload_cb_fn != NULL) {
    5859           0 :                         SPDK_ERRLOG("Blobstore unload in progress\n");
    5860           0 :                         cb_fn(cb_arg, -EBUSY);
    5861           0 :                         return;
    5862             :                 }
    5863           4 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32
    5864             :                               " esnap clones are unloading\n", bs->esnap_channels_unloading);
    5865           4 :                 bs->esnap_unload_cb_fn = cb_fn;
    5866           4 :                 bs->esnap_unload_cb_arg = cb_arg;
    5867           4 :                 return;
    5868             :         }
    5869         658 :         if (bs->esnap_unload_cb_fn != NULL) {
    5870           4 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n");
    5871           4 :                 assert(bs->esnap_unload_cb_fn == cb_fn);
    5872           4 :                 assert(bs->esnap_unload_cb_arg == cb_arg);
    5873           4 :                 bs->esnap_unload_cb_fn = NULL;
    5874           4 :                 bs->esnap_unload_cb_arg = NULL;
    5875             :         }
    5876             : 
    5877         658 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5878           4 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5879           4 :                 cb_fn(cb_arg, -EBUSY);
    5880           4 :                 return;
    5881             :         }
    5882             : 
    5883         654 :         ctx = calloc(1, sizeof(*ctx));
    5884         654 :         if (!ctx) {
    5885           0 :                 cb_fn(cb_arg, -ENOMEM);
    5886           0 :                 return;
    5887             :         }
    5888             : 
    5889         654 :         ctx->bs = bs;
    5890             : 
    5891         654 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    5892             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5893         654 :         if (!ctx->super) {
    5894           0 :                 free(ctx);
    5895           0 :                 cb_fn(cb_arg, -ENOMEM);
    5896           0 :                 return;
    5897             :         }
    5898             : 
    5899         654 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5900         654 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5901         654 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5902             : 
    5903         654 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5904         654 :         if (!ctx->seq) {
    5905           0 :                 spdk_free(ctx->super);
    5906           0 :                 free(ctx);
    5907           0 :                 cb_fn(cb_arg, -ENOMEM);
    5908           0 :                 return;
    5909             :         }
    5910             : 
    5911             :         /* Read super block */
    5912         654 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5913         654 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5914             :                              bs_unload_read_super_cpl, ctx);
    5915             : }
    5916             : 
    5917             : /* END spdk_bs_unload */
    5918             : 
    5919             : /* START spdk_bs_set_super */
    5920             : 
    5921             : struct spdk_bs_set_super_ctx {
    5922             :         struct spdk_blob_store          *bs;
    5923             :         struct spdk_bs_super_block      *super;
    5924             : };
    5925             : 
    5926             : static void
    5927           8 : bs_set_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5928             : {
    5929           8 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5930             : 
    5931           8 :         if (bserrno != 0) {
    5932           0 :                 SPDK_ERRLOG("Unable to write to super block of blobstore\n");
    5933             :         }
    5934             : 
    5935           8 :         spdk_free(ctx->super);
    5936             : 
    5937           8 :         bs_sequence_finish(seq, bserrno);
    5938             : 
    5939           8 :         free(ctx);
    5940           8 : }
    5941             : 
    5942             : static void
    5943           8 : bs_set_super_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5944             : {
    5945           8 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5946             :         int rc;
    5947             : 
    5948           8 :         if (bserrno != 0) {
    5949           0 :                 SPDK_ERRLOG("Unable to read super block of blobstore\n");
    5950           0 :                 spdk_free(ctx->super);
    5951           0 :                 bs_sequence_finish(seq, bserrno);
    5952           0 :                 free(ctx);
    5953           0 :                 return;
    5954             :         }
    5955             : 
    5956           8 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5957           8 :         if (rc != 0) {
    5958           0 :                 SPDK_ERRLOG("Not a valid super block\n");
    5959           0 :                 spdk_free(ctx->super);
    5960           0 :                 bs_sequence_finish(seq, rc);
    5961           0 :                 free(ctx);
    5962           0 :                 return;
    5963             :         }
    5964             : 
    5965           8 :         bs_write_super(seq, ctx->bs, ctx->super, bs_set_super_write_cpl, ctx);
    5966             : }
    5967             : 
    5968             : void
    5969           8 : spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
    5970             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    5971             : {
    5972           8 :         struct spdk_bs_cpl              cpl;
    5973             :         spdk_bs_sequence_t              *seq;
    5974             :         struct spdk_bs_set_super_ctx    *ctx;
    5975             : 
    5976           8 :         SPDK_DEBUGLOG(blob, "Setting super blob id on blobstore\n");
    5977             : 
    5978           8 :         ctx = calloc(1, sizeof(*ctx));
    5979           8 :         if (!ctx) {
    5980           0 :                 cb_fn(cb_arg, -ENOMEM);
    5981           0 :                 return;
    5982             :         }
    5983             : 
    5984           8 :         ctx->bs = bs;
    5985             : 
    5986           8 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    5987             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5988           8 :         if (!ctx->super) {
    5989           0 :                 free(ctx);
    5990           0 :                 cb_fn(cb_arg, -ENOMEM);
    5991           0 :                 return;
    5992             :         }
    5993             : 
    5994           8 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5995           8 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5996           8 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5997             : 
    5998           8 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5999           8 :         if (!seq) {
    6000           0 :                 spdk_free(ctx->super);
    6001           0 :                 free(ctx);
    6002           0 :                 cb_fn(cb_arg, -ENOMEM);
    6003           0 :                 return;
    6004             :         }
    6005             : 
    6006           8 :         bs->super_blob = blobid;
    6007             : 
    6008             :         /* Read super block */
    6009           8 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    6010           8 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    6011             :                              bs_set_super_read_cpl, ctx);
    6012             : }
    6013             : 
    6014             : /* END spdk_bs_set_super */
    6015             : 
    6016             : void
    6017          12 : spdk_bs_get_super(struct spdk_blob_store *bs,
    6018             :                   spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6019             : {
    6020          12 :         if (bs->super_blob == SPDK_BLOBID_INVALID) {
    6021           4 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT);
    6022             :         } else {
    6023           8 :                 cb_fn(cb_arg, bs->super_blob, 0);
    6024             :         }
    6025          12 : }
    6026             : 
    6027             : uint64_t
    6028         132 : spdk_bs_get_cluster_size(struct spdk_blob_store *bs)
    6029             : {
    6030         132 :         return bs->cluster_sz;
    6031             : }
    6032             : 
    6033             : uint64_t
    6034          68 : spdk_bs_get_page_size(struct spdk_blob_store *bs)
    6035             : {
    6036          68 :         return SPDK_BS_PAGE_SIZE;
    6037             : }
    6038             : 
    6039             : uint64_t
    6040         734 : spdk_bs_get_io_unit_size(struct spdk_blob_store *bs)
    6041             : {
    6042         734 :         return bs->io_unit_size;
    6043             : }
    6044             : 
    6045             : uint64_t
    6046         540 : spdk_bs_free_cluster_count(struct spdk_blob_store *bs)
    6047             : {
    6048         540 :         return bs->num_free_clusters;
    6049             : }
    6050             : 
    6051             : uint64_t
    6052          92 : spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs)
    6053             : {
    6054          92 :         return bs->total_data_clusters;
    6055             : }
    6056             : 
    6057             : static int
    6058         780 : bs_register_md_thread(struct spdk_blob_store *bs)
    6059             : {
    6060         780 :         bs->md_channel = spdk_get_io_channel(bs);
    6061         780 :         if (!bs->md_channel) {
    6062           0 :                 SPDK_ERRLOG("Failed to get IO channel.\n");
    6063           0 :                 return -1;
    6064             :         }
    6065             : 
    6066         780 :         return 0;
    6067             : }
    6068             : 
    6069             : static int
    6070         780 : bs_unregister_md_thread(struct spdk_blob_store *bs)
    6071             : {
    6072         780 :         spdk_put_io_channel(bs->md_channel);
    6073             : 
    6074         780 :         return 0;
    6075             : }
    6076             : 
    6077             : spdk_blob_id
    6078         562 : spdk_blob_get_id(struct spdk_blob *blob)
    6079             : {
    6080         562 :         assert(blob != NULL);
    6081             : 
    6082         562 :         return blob->id;
    6083             : }
    6084             : 
    6085             : uint64_t
    6086          24 : spdk_blob_get_num_pages(struct spdk_blob *blob)
    6087             : {
    6088          24 :         assert(blob != NULL);
    6089             : 
    6090          24 :         return bs_cluster_to_page(blob->bs, blob->active.num_clusters);
    6091             : }
    6092             : 
    6093             : uint64_t
    6094          24 : spdk_blob_get_num_io_units(struct spdk_blob *blob)
    6095             : {
    6096          24 :         assert(blob != NULL);
    6097             : 
    6098          24 :         return spdk_blob_get_num_pages(blob) * bs_io_unit_per_page(blob->bs);
    6099             : }
    6100             : 
    6101             : uint64_t
    6102         565 : spdk_blob_get_num_clusters(struct spdk_blob *blob)
    6103             : {
    6104         565 :         assert(blob != NULL);
    6105             : 
    6106         565 :         return blob->active.num_clusters;
    6107             : }
    6108             : 
    6109             : uint64_t
    6110         330 : spdk_blob_get_num_allocated_clusters(struct spdk_blob *blob)
    6111             : {
    6112         330 :         assert(blob != NULL);
    6113             : 
    6114         330 :         return blob->active.num_allocated_clusters;
    6115             : }
    6116             : 
    6117             : static uint64_t
    6118          24 : blob_find_io_unit(struct spdk_blob *blob, uint64_t offset, bool is_allocated)
    6119             : {
    6120          24 :         uint64_t blob_io_unit_num = spdk_blob_get_num_io_units(blob);
    6121             : 
    6122          44 :         while (offset < blob_io_unit_num) {
    6123          40 :                 if (bs_io_unit_is_allocated(blob, offset) == is_allocated) {
    6124          20 :                         return offset;
    6125             :                 }
    6126             : 
    6127          20 :                 offset += bs_num_io_units_to_cluster_boundary(blob, offset);
    6128             :         }
    6129             : 
    6130           4 :         return UINT64_MAX;
    6131             : }
    6132             : 
    6133             : uint64_t
    6134          12 : spdk_blob_get_next_allocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6135             : {
    6136          12 :         return blob_find_io_unit(blob, offset, true);
    6137             : }
    6138             : 
    6139             : uint64_t
    6140          12 : spdk_blob_get_next_unallocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6141             : {
    6142          12 :         return blob_find_io_unit(blob, offset, false);
    6143             : }
    6144             : 
    6145             : /* START spdk_bs_create_blob */
    6146             : 
    6147             : static void
    6148        1874 : bs_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    6149             : {
    6150        1874 :         struct spdk_blob *blob = cb_arg;
    6151        1874 :         uint32_t page_idx = bs_blobid_to_page(blob->id);
    6152             : 
    6153        1874 :         if (bserrno != 0) {
    6154           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    6155           0 :                 spdk_bit_array_clear(blob->bs->used_blobids, page_idx);
    6156           0 :                 bs_release_md_page(blob->bs, page_idx);
    6157           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    6158             :         }
    6159             : 
    6160        1874 :         blob_free(blob);
    6161             : 
    6162        1874 :         bs_sequence_finish(seq, bserrno);
    6163        1874 : }
    6164             : 
    6165             : static int
    6166        3768 : blob_set_xattrs(struct spdk_blob *blob, const struct spdk_blob_xattr_opts *xattrs,
    6167             :                 bool internal)
    6168             : {
    6169             :         uint64_t i;
    6170        3768 :         size_t value_len = 0;
    6171             :         int rc;
    6172        3768 :         const void *value = NULL;
    6173        3768 :         if (xattrs->count > 0 && xattrs->get_value == NULL) {
    6174           8 :                 return -EINVAL;
    6175             :         }
    6176        4072 :         for (i = 0; i < xattrs->count; i++) {
    6177         316 :                 xattrs->get_value(xattrs->ctx, xattrs->names[i], &value, &value_len);
    6178         316 :                 if (value == NULL || value_len == 0) {
    6179           4 :                         return -EINVAL;
    6180             :                 }
    6181         312 :                 rc = blob_set_xattr(blob, xattrs->names[i], value, value_len, internal);
    6182         312 :                 if (rc < 0) {
    6183           0 :                         return rc;
    6184             :                 }
    6185             :         }
    6186        3756 :         return 0;
    6187             : }
    6188             : 
    6189             : static void
    6190        1858 : blob_opts_copy(const struct spdk_blob_opts *src, struct spdk_blob_opts *dst)
    6191             : {
    6192             : #define FIELD_OK(field) \
    6193             :         offsetof(struct spdk_blob_opts, field) + sizeof(src->field) <= src->opts_size
    6194             : 
    6195             : #define SET_FIELD(field) \
    6196             :         if (FIELD_OK(field)) { \
    6197             :                 dst->field = src->field; \
    6198             :         } \
    6199             : 
    6200        1858 :         SET_FIELD(num_clusters);
    6201        1858 :         SET_FIELD(thin_provision);
    6202        1858 :         SET_FIELD(clear_method);
    6203             : 
    6204        1858 :         if (FIELD_OK(xattrs)) {
    6205        1858 :                 memcpy(&dst->xattrs, &src->xattrs, sizeof(src->xattrs));
    6206             :         }
    6207             : 
    6208        1858 :         SET_FIELD(use_extent_table);
    6209        1858 :         SET_FIELD(esnap_id);
    6210        1858 :         SET_FIELD(esnap_id_len);
    6211             : 
    6212        1858 :         dst->opts_size = src->opts_size;
    6213             : 
    6214             :         /* You should not remove this statement, but need to update the assert statement
    6215             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    6216             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_opts) == 80, "Incorrect size");
    6217             : 
    6218             : #undef FIELD_OK
    6219             : #undef SET_FIELD
    6220        1858 : }
    6221             : 
    6222             : static void
    6223        1890 : bs_create_blob(struct spdk_blob_store *bs,
    6224             :                const struct spdk_blob_opts *opts,
    6225             :                const struct spdk_blob_xattr_opts *internal_xattrs,
    6226             :                spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6227             : {
    6228             :         struct spdk_blob        *blob;
    6229             :         uint32_t                page_idx;
    6230        1890 :         struct spdk_bs_cpl      cpl;
    6231        1890 :         struct spdk_blob_opts   opts_local;
    6232        1890 :         struct spdk_blob_xattr_opts internal_xattrs_default;
    6233             :         spdk_bs_sequence_t      *seq;
    6234             :         spdk_blob_id            id;
    6235             :         int rc;
    6236             : 
    6237        1890 :         assert(spdk_get_thread() == bs->md_thread);
    6238             : 
    6239        1890 :         spdk_spin_lock(&bs->used_lock);
    6240        1890 :         page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0);
    6241        1890 :         if (page_idx == UINT32_MAX) {
    6242           0 :                 spdk_spin_unlock(&bs->used_lock);
    6243           0 :                 cb_fn(cb_arg, 0, -ENOMEM);
    6244           0 :                 return;
    6245             :         }
    6246        1890 :         spdk_bit_array_set(bs->used_blobids, page_idx);
    6247        1890 :         bs_claim_md_page(bs, page_idx);
    6248        1890 :         spdk_spin_unlock(&bs->used_lock);
    6249             : 
    6250        1890 :         id = bs_page_to_blobid(page_idx);
    6251             : 
    6252        1890 :         SPDK_DEBUGLOG(blob, "Creating blob with id 0x%" PRIx64 " at page %u\n", id, page_idx);
    6253             : 
    6254        1890 :         spdk_blob_opts_init(&opts_local, sizeof(opts_local));
    6255        1890 :         if (opts) {
    6256        1858 :                 blob_opts_copy(opts, &opts_local);
    6257             :         }
    6258             : 
    6259        1890 :         blob = blob_alloc(bs, id);
    6260        1890 :         if (!blob) {
    6261           0 :                 rc = -ENOMEM;
    6262           0 :                 goto error;
    6263             :         }
    6264             : 
    6265        1890 :         blob->use_extent_table = opts_local.use_extent_table;
    6266        1890 :         if (blob->use_extent_table) {
    6267         966 :                 blob->invalid_flags |= SPDK_BLOB_EXTENT_TABLE;
    6268             :         }
    6269             : 
    6270        1890 :         if (!internal_xattrs) {
    6271        1622 :                 blob_xattrs_init(&internal_xattrs_default);
    6272        1622 :                 internal_xattrs = &internal_xattrs_default;
    6273             :         }
    6274             : 
    6275        1890 :         rc = blob_set_xattrs(blob, &opts_local.xattrs, false);
    6276        1890 :         if (rc < 0) {
    6277          12 :                 goto error;
    6278             :         }
    6279             : 
    6280        1878 :         rc = blob_set_xattrs(blob, internal_xattrs, true);
    6281        1878 :         if (rc < 0) {
    6282           0 :                 goto error;
    6283             :         }
    6284             : 
    6285        1878 :         if (opts_local.thin_provision) {
    6286         352 :                 blob_set_thin_provision(blob);
    6287             :         }
    6288             : 
    6289        1878 :         blob_set_clear_method(blob, opts_local.clear_method);
    6290             : 
    6291        1878 :         if (opts_local.esnap_id != NULL) {
    6292          60 :                 if (opts_local.esnap_id_len > UINT16_MAX) {
    6293           0 :                         SPDK_ERRLOG("esnap id length %" PRIu64 "is too long\n",
    6294             :                                     opts_local.esnap_id_len);
    6295           0 :                         rc = -EINVAL;
    6296           0 :                         goto error;
    6297             : 
    6298             :                 }
    6299          60 :                 blob_set_thin_provision(blob);
    6300          60 :                 blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6301          60 :                 rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID,
    6302          60 :                                     opts_local.esnap_id, opts_local.esnap_id_len, true);
    6303          60 :                 if (rc != 0) {
    6304           0 :                         goto error;
    6305             :                 }
    6306             :         }
    6307             : 
    6308        1878 :         rc = blob_resize(blob, opts_local.num_clusters);
    6309        1878 :         if (rc < 0) {
    6310           4 :                 goto error;
    6311             :         }
    6312        1874 :         cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6313        1874 :         cpl.u.blobid.cb_fn = cb_fn;
    6314        1874 :         cpl.u.blobid.cb_arg = cb_arg;
    6315        1874 :         cpl.u.blobid.blobid = blob->id;
    6316             : 
    6317        1874 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6318        1874 :         if (!seq) {
    6319           0 :                 rc = -ENOMEM;
    6320           0 :                 goto error;
    6321             :         }
    6322             : 
    6323        1874 :         blob_persist(seq, blob, bs_create_blob_cpl, blob);
    6324        1874 :         return;
    6325             : 
    6326          16 : error:
    6327          16 :         SPDK_ERRLOG("Failed to create blob: %s, size in clusters/size: %lu (clusters)\n",
    6328             :                     spdk_strerror(rc), opts_local.num_clusters);
    6329          16 :         if (blob != NULL) {
    6330          16 :                 blob_free(blob);
    6331             :         }
    6332          16 :         spdk_spin_lock(&bs->used_lock);
    6333          16 :         spdk_bit_array_clear(bs->used_blobids, page_idx);
    6334          16 :         bs_release_md_page(bs, page_idx);
    6335          16 :         spdk_spin_unlock(&bs->used_lock);
    6336          16 :         cb_fn(cb_arg, 0, rc);
    6337             : }
    6338             : 
    6339             : void
    6340          16 : spdk_bs_create_blob(struct spdk_blob_store *bs,
    6341             :                     spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6342             : {
    6343          16 :         bs_create_blob(bs, NULL, NULL, cb_fn, cb_arg);
    6344          16 : }
    6345             : 
    6346             : void
    6347        1598 : spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
    6348             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6349             : {
    6350        1598 :         bs_create_blob(bs, opts, NULL, cb_fn, cb_arg);
    6351        1598 : }
    6352             : 
    6353             : /* END spdk_bs_create_blob */
    6354             : 
    6355             : /* START blob_cleanup */
    6356             : 
    6357             : struct spdk_clone_snapshot_ctx {
    6358             :         struct spdk_bs_cpl      cpl;
    6359             :         int bserrno;
    6360             :         bool frozen;
    6361             : 
    6362             :         struct spdk_io_channel *channel;
    6363             : 
    6364             :         /* Current cluster for inflate operation */
    6365             :         uint64_t cluster;
    6366             : 
    6367             :         /* For inflation force allocation of all unallocated clusters and remove
    6368             :          * thin-provisioning. Otherwise only decouple parent and keep clone thin. */
    6369             :         bool allocate_all;
    6370             : 
    6371             :         struct {
    6372             :                 spdk_blob_id id;
    6373             :                 struct spdk_blob *blob;
    6374             :                 bool md_ro;
    6375             :         } original;
    6376             :         struct {
    6377             :                 spdk_blob_id id;
    6378             :                 struct spdk_blob *blob;
    6379             :         } new;
    6380             : 
    6381             :         /* xattrs specified for snapshot/clones only. They have no impact on
    6382             :          * the original blobs xattrs. */
    6383             :         const struct spdk_blob_xattr_opts *xattrs;
    6384             : };
    6385             : 
    6386             : static void
    6387         338 : bs_clone_snapshot_cleanup_finish(void *cb_arg, int bserrno)
    6388             : {
    6389         338 :         struct spdk_clone_snapshot_ctx *ctx = cb_arg;
    6390         338 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    6391             : 
    6392         338 :         if (bserrno != 0) {
    6393           6 :                 if (ctx->bserrno != 0) {
    6394           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6395             :                 } else {
    6396           6 :                         ctx->bserrno = bserrno;
    6397             :                 }
    6398             :         }
    6399             : 
    6400         338 :         switch (cpl->type) {
    6401         278 :         case SPDK_BS_CPL_TYPE_BLOBID:
    6402         278 :                 cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, cpl->u.blobid.blobid, ctx->bserrno);
    6403         278 :                 break;
    6404          60 :         case SPDK_BS_CPL_TYPE_BLOB_BASIC:
    6405          60 :                 cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    6406          60 :                 break;
    6407           0 :         default:
    6408           0 :                 SPDK_UNREACHABLE();
    6409             :                 break;
    6410             :         }
    6411             : 
    6412         338 :         free(ctx);
    6413         338 : }
    6414             : 
    6415             : static void
    6416         324 : bs_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    6417             : {
    6418         324 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6419         324 :         struct spdk_blob *origblob = ctx->original.blob;
    6420             : 
    6421         324 :         if (bserrno != 0) {
    6422           0 :                 if (ctx->bserrno != 0) {
    6423           0 :                         SPDK_ERRLOG("Unfreeze error %d\n", bserrno);
    6424             :                 } else {
    6425           0 :                         ctx->bserrno = bserrno;
    6426             :                 }
    6427             :         }
    6428             : 
    6429         324 :         ctx->original.id = origblob->id;
    6430         324 :         origblob->locked_operation_in_progress = false;
    6431             : 
    6432             :         /* Revert md_ro to original state */
    6433         324 :         origblob->md_ro = ctx->original.md_ro;
    6434             : 
    6435         324 :         spdk_blob_close(origblob, bs_clone_snapshot_cleanup_finish, ctx);
    6436         324 : }
    6437             : 
    6438             : static void
    6439         324 : bs_clone_snapshot_origblob_cleanup(void *cb_arg, int bserrno)
    6440             : {
    6441         324 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6442         324 :         struct spdk_blob *origblob = ctx->original.blob;
    6443             : 
    6444         324 :         if (bserrno != 0) {
    6445          24 :                 if (ctx->bserrno != 0) {
    6446           4 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6447             :                 } else {
    6448          20 :                         ctx->bserrno = bserrno;
    6449             :                 }
    6450             :         }
    6451             : 
    6452         324 :         if (ctx->frozen) {
    6453             :                 /* Unfreeze any outstanding I/O */
    6454         208 :                 blob_unfreeze_io(origblob, bs_snapshot_unfreeze_cpl, ctx);
    6455             :         } else {
    6456         116 :                 bs_snapshot_unfreeze_cpl(ctx, 0);
    6457             :         }
    6458             : 
    6459         324 : }
    6460             : 
    6461             : static void
    6462           4 : bs_clone_snapshot_newblob_cleanup(struct spdk_clone_snapshot_ctx *ctx, int bserrno)
    6463             : {
    6464           4 :         struct spdk_blob *newblob = ctx->new.blob;
    6465             : 
    6466           4 :         if (bserrno != 0) {
    6467           4 :                 if (ctx->bserrno != 0) {
    6468           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6469             :                 } else {
    6470           4 :                         ctx->bserrno = bserrno;
    6471             :                 }
    6472             :         }
    6473             : 
    6474           4 :         ctx->new.id = newblob->id;
    6475           4 :         spdk_blob_close(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6476           4 : }
    6477             : 
    6478             : /* END blob_cleanup */
    6479             : 
    6480             : /* START spdk_bs_create_snapshot */
    6481             : 
    6482             : static void
    6483         216 : bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
    6484             : {
    6485             :         uint64_t *cluster_temp;
    6486             :         uint64_t num_allocated_clusters_temp;
    6487             :         uint32_t *extent_page_temp;
    6488             : 
    6489         216 :         cluster_temp = blob1->active.clusters;
    6490         216 :         blob1->active.clusters = blob2->active.clusters;
    6491         216 :         blob2->active.clusters = cluster_temp;
    6492             : 
    6493         216 :         num_allocated_clusters_temp = blob1->active.num_allocated_clusters;
    6494         216 :         blob1->active.num_allocated_clusters = blob2->active.num_allocated_clusters;
    6495         216 :         blob2->active.num_allocated_clusters = num_allocated_clusters_temp;
    6496             : 
    6497         216 :         extent_page_temp = blob1->active.extent_pages;
    6498         216 :         blob1->active.extent_pages = blob2->active.extent_pages;
    6499         216 :         blob2->active.extent_pages = extent_page_temp;
    6500         216 : }
    6501             : 
    6502             : /* Copies an internal xattr */
    6503             : static int
    6504          20 : bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
    6505             : {
    6506          20 :         const void      *val = NULL;
    6507          20 :         size_t          len;
    6508             :         int             bserrno;
    6509             : 
    6510          20 :         bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
    6511          20 :         if (bserrno != 0) {
    6512           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s XATTR\n", fromblob->id, name);
    6513           0 :                 return bserrno;
    6514             :         }
    6515             : 
    6516          20 :         bserrno = blob_set_xattr(toblob, name, val, len, true);
    6517          20 :         if (bserrno != 0) {
    6518           0 :                 SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
    6519             :                             name, toblob->id);
    6520           0 :                 return bserrno;
    6521             :         }
    6522          20 :         return 0;
    6523             : }
    6524             : 
    6525             : static void
    6526         204 : bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
    6527             : {
    6528         204 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6529         204 :         struct spdk_blob *origblob = ctx->original.blob;
    6530         204 :         struct spdk_blob *newblob = ctx->new.blob;
    6531             : 
    6532         204 :         if (bserrno != 0) {
    6533           4 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6534           4 :                 if (blob_is_esnap_clone(newblob)) {
    6535           0 :                         bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6536           0 :                         origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6537             :                 }
    6538           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6539           4 :                 return;
    6540             :         }
    6541             : 
    6542             :         /* Remove metadata descriptor SNAPSHOT_IN_PROGRESS */
    6543         200 :         bserrno = blob_remove_xattr(newblob, SNAPSHOT_IN_PROGRESS, true);
    6544         200 :         if (bserrno != 0) {
    6545           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6546           0 :                 return;
    6547             :         }
    6548             : 
    6549         200 :         bs_blob_list_add(ctx->original.blob);
    6550             : 
    6551         200 :         spdk_blob_set_read_only(newblob);
    6552             : 
    6553             :         /* sync snapshot metadata */
    6554         200 :         spdk_blob_sync_md(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6555             : }
    6556             : 
    6557             : static void
    6558         208 : bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
    6559             : {
    6560         208 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6561         208 :         struct spdk_blob *origblob = ctx->original.blob;
    6562         208 :         struct spdk_blob *newblob = ctx->new.blob;
    6563             : 
    6564         208 :         if (bserrno != 0) {
    6565             :                 /* return cluster map back to original */
    6566           4 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6567             : 
    6568             :                 /* Newblob md sync failed. Valid clusters are only present in origblob.
    6569             :                  * Since I/O is frozen on origblob, not changes to zeroed out cluster map should have occurred.
    6570             :                  * Newblob needs to be reverted to thin_provisioned state at creation to properly close. */
    6571           4 :                 blob_set_thin_provision(newblob);
    6572           4 :                 assert(spdk_mem_all_zero(newblob->active.clusters,
    6573             :                                          newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6574           4 :                 assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6575             :                                          newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6576             : 
    6577           4 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6578           4 :                 return;
    6579             :         }
    6580             : 
    6581             :         /* Set internal xattr for snapshot id */
    6582         204 :         bserrno = blob_set_xattr(origblob, BLOB_SNAPSHOT, &newblob->id, sizeof(spdk_blob_id), true);
    6583         204 :         if (bserrno != 0) {
    6584             :                 /* return cluster map back to original */
    6585           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6586           0 :                 blob_set_thin_provision(newblob);
    6587           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6588           0 :                 return;
    6589             :         }
    6590             : 
    6591             :         /* Create new back_bs_dev for snapshot */
    6592         204 :         origblob->back_bs_dev = bs_create_blob_bs_dev(newblob);
    6593         204 :         if (origblob->back_bs_dev == NULL) {
    6594             :                 /* return cluster map back to original */
    6595           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6596           0 :                 blob_set_thin_provision(newblob);
    6597           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, -EINVAL);
    6598           0 :                 return;
    6599             :         }
    6600             : 
    6601             :         /* Remove the xattr that references an external snapshot */
    6602         204 :         if (blob_is_esnap_clone(origblob)) {
    6603          12 :                 origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6604          12 :                 bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    6605          12 :                 if (bserrno != 0) {
    6606           0 :                         if (bserrno == -ENOENT) {
    6607           0 :                                 SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
    6608             :                                             " xattr to remove\n", origblob->id);
    6609           0 :                                 assert(false);
    6610             :                         } else {
    6611             :                                 /* return cluster map back to original */
    6612           0 :                                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6613           0 :                                 blob_set_thin_provision(newblob);
    6614           0 :                                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6615           0 :                                 return;
    6616             :                         }
    6617             :                 }
    6618             :         }
    6619             : 
    6620         204 :         bs_blob_list_remove(origblob);
    6621         204 :         origblob->parent_id = newblob->id;
    6622             :         /* set clone blob as thin provisioned */
    6623         204 :         blob_set_thin_provision(origblob);
    6624             : 
    6625         204 :         bs_blob_list_add(newblob);
    6626             : 
    6627             :         /* sync clone metadata */
    6628         204 :         spdk_blob_sync_md(origblob, bs_snapshot_origblob_sync_cpl, ctx);
    6629             : }
    6630             : 
    6631             : static void
    6632         208 : bs_snapshot_freeze_cpl(void *cb_arg, int rc)
    6633             : {
    6634         208 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6635         208 :         struct spdk_blob *origblob = ctx->original.blob;
    6636         208 :         struct spdk_blob *newblob = ctx->new.blob;
    6637             :         int bserrno;
    6638             : 
    6639         208 :         if (rc != 0) {
    6640           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, rc);
    6641           0 :                 return;
    6642             :         }
    6643             : 
    6644         208 :         ctx->frozen = true;
    6645             : 
    6646         208 :         if (blob_is_esnap_clone(origblob)) {
    6647             :                 /* Clean up any channels associated with the original blob id because future IO will
    6648             :                  * perform IO using the snapshot blob_id.
    6649             :                  */
    6650          12 :                 blob_esnap_destroy_bs_dev_channels(origblob, false, NULL, NULL);
    6651             :         }
    6652         208 :         if (newblob->back_bs_dev) {
    6653         208 :                 blob_back_bs_destroy(newblob);
    6654             :         }
    6655             :         /* set new back_bs_dev for snapshot */
    6656         208 :         newblob->back_bs_dev = origblob->back_bs_dev;
    6657             :         /* Set invalid flags from origblob */
    6658         208 :         newblob->invalid_flags = origblob->invalid_flags;
    6659             : 
    6660             :         /* inherit parent from original blob if set */
    6661         208 :         newblob->parent_id = origblob->parent_id;
    6662         208 :         switch (origblob->parent_id) {
    6663          12 :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    6664          12 :                 bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6665          12 :                 if (bserrno != 0) {
    6666           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6667           0 :                         return;
    6668             :                 }
    6669          12 :                 break;
    6670         144 :         case SPDK_BLOBID_INVALID:
    6671         144 :                 break;
    6672          52 :         default:
    6673             :                 /* Set internal xattr for snapshot id */
    6674          52 :                 bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
    6675          52 :                                          &origblob->parent_id, sizeof(spdk_blob_id), true);
    6676          52 :                 if (bserrno != 0) {
    6677           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6678           0 :                         return;
    6679             :                 }
    6680             :         }
    6681             : 
    6682             :         /* swap cluster maps */
    6683         208 :         bs_snapshot_swap_cluster_maps(newblob, origblob);
    6684             : 
    6685             :         /* Set the clear method on the new blob to match the original. */
    6686         208 :         blob_set_clear_method(newblob, origblob->clear_method);
    6687             : 
    6688             :         /* sync snapshot metadata */
    6689         208 :         spdk_blob_sync_md(newblob, bs_snapshot_newblob_sync_cpl, ctx);
    6690             : }
    6691             : 
    6692             : static void
    6693         212 : bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6694             : {
    6695         212 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6696         212 :         struct spdk_blob *origblob = ctx->original.blob;
    6697         212 :         struct spdk_blob *newblob = _blob;
    6698             : 
    6699         212 :         if (bserrno != 0) {
    6700           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6701           4 :                 return;
    6702             :         }
    6703             : 
    6704         208 :         ctx->new.blob = newblob;
    6705         208 :         assert(spdk_blob_is_thin_provisioned(newblob));
    6706         208 :         assert(spdk_mem_all_zero(newblob->active.clusters,
    6707             :                                  newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6708         208 :         assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6709             :                                  newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6710             : 
    6711         208 :         blob_freeze_io(origblob, bs_snapshot_freeze_cpl, ctx);
    6712             : }
    6713             : 
    6714             : static void
    6715         216 : bs_snapshot_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6716             : {
    6717         216 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6718         216 :         struct spdk_blob *origblob = ctx->original.blob;
    6719             : 
    6720         216 :         if (bserrno != 0) {
    6721           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6722           4 :                 return;
    6723             :         }
    6724             : 
    6725         212 :         ctx->new.id = blobid;
    6726         212 :         ctx->cpl.u.blobid.blobid = blobid;
    6727             : 
    6728         212 :         spdk_bs_open_blob(origblob->bs, ctx->new.id, bs_snapshot_newblob_open_cpl, ctx);
    6729             : }
    6730             : 
    6731             : 
    6732             : static void
    6733         216 : bs_xattr_snapshot(void *arg, const char *name,
    6734             :                   const void **value, size_t *value_len)
    6735             : {
    6736         216 :         assert(strncmp(name, SNAPSHOT_IN_PROGRESS, sizeof(SNAPSHOT_IN_PROGRESS)) == 0);
    6737             : 
    6738         216 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6739         216 :         *value = &blob->id;
    6740         216 :         *value_len = sizeof(blob->id);
    6741         216 : }
    6742             : 
    6743             : static void
    6744         226 : bs_snapshot_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6745             : {
    6746         226 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6747         226 :         struct spdk_blob_opts opts;
    6748         226 :         struct spdk_blob_xattr_opts internal_xattrs;
    6749         226 :         char *xattrs_names[] = { SNAPSHOT_IN_PROGRESS };
    6750             : 
    6751         226 :         if (bserrno != 0) {
    6752           6 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6753           6 :                 return;
    6754             :         }
    6755             : 
    6756         220 :         ctx->original.blob = _blob;
    6757             : 
    6758         220 :         if (_blob->data_ro || _blob->md_ro) {
    6759           4 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot from read only blob with id 0x%"
    6760             :                               PRIx64 "\n", _blob->id);
    6761           4 :                 ctx->bserrno = -EINVAL;
    6762           4 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6763           4 :                 return;
    6764             :         }
    6765             : 
    6766         216 :         if (_blob->locked_operation_in_progress) {
    6767           0 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot - another operation in progress\n");
    6768           0 :                 ctx->bserrno = -EBUSY;
    6769           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6770           0 :                 return;
    6771             :         }
    6772             : 
    6773         216 :         _blob->locked_operation_in_progress = true;
    6774             : 
    6775         216 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6776         216 :         blob_xattrs_init(&internal_xattrs);
    6777             : 
    6778             :         /* Change the size of new blob to the same as in original blob,
    6779             :          * but do not allocate clusters */
    6780         216 :         opts.thin_provision = true;
    6781         216 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6782         216 :         opts.use_extent_table = _blob->use_extent_table;
    6783             : 
    6784             :         /* If there are any xattrs specified for snapshot, set them now */
    6785         216 :         if (ctx->xattrs) {
    6786           4 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6787             :         }
    6788             :         /* Set internal xattr SNAPSHOT_IN_PROGRESS */
    6789         216 :         internal_xattrs.count = 1;
    6790         216 :         internal_xattrs.ctx = _blob;
    6791         216 :         internal_xattrs.names = xattrs_names;
    6792         216 :         internal_xattrs.get_value = bs_xattr_snapshot;
    6793             : 
    6794         216 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6795             :                        bs_snapshot_newblob_create_cpl, ctx);
    6796             : }
    6797             : 
    6798             : void
    6799         226 : spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6800             :                         const struct spdk_blob_xattr_opts *snapshot_xattrs,
    6801             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6802             : {
    6803         226 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    6804             : 
    6805         226 :         if (!ctx) {
    6806           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6807           0 :                 return;
    6808             :         }
    6809         226 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6810         226 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6811         226 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6812         226 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6813         226 :         ctx->bserrno = 0;
    6814         226 :         ctx->frozen = false;
    6815         226 :         ctx->original.id = blobid;
    6816         226 :         ctx->xattrs = snapshot_xattrs;
    6817             : 
    6818         226 :         spdk_bs_open_blob(bs, ctx->original.id, bs_snapshot_origblob_open_cpl, ctx);
    6819             : }
    6820             : /* END spdk_bs_create_snapshot */
    6821             : 
    6822             : /* START spdk_bs_create_clone */
    6823             : 
    6824             : static void
    6825          48 : bs_xattr_clone(void *arg, const char *name,
    6826             :                const void **value, size_t *value_len)
    6827             : {
    6828          48 :         assert(strncmp(name, BLOB_SNAPSHOT, sizeof(BLOB_SNAPSHOT)) == 0);
    6829             : 
    6830          48 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6831          48 :         *value = &blob->id;
    6832          48 :         *value_len = sizeof(blob->id);
    6833          48 : }
    6834             : 
    6835             : static void
    6836          48 : bs_clone_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6837             : {
    6838          48 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6839          48 :         struct spdk_blob *clone = _blob;
    6840             : 
    6841          48 :         ctx->new.blob = clone;
    6842          48 :         bs_blob_list_add(clone);
    6843             : 
    6844          48 :         spdk_blob_close(clone, bs_clone_snapshot_origblob_cleanup, ctx);
    6845          48 : }
    6846             : 
    6847             : static void
    6848          48 : bs_clone_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6849             : {
    6850          48 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6851             : 
    6852          48 :         ctx->cpl.u.blobid.blobid = blobid;
    6853          48 :         spdk_bs_open_blob(ctx->original.blob->bs, blobid, bs_clone_newblob_open_cpl, ctx);
    6854          48 : }
    6855             : 
    6856             : static void
    6857          52 : bs_clone_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6858             : {
    6859          52 :         struct spdk_clone_snapshot_ctx  *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6860          52 :         struct spdk_blob_opts           opts;
    6861          52 :         struct spdk_blob_xattr_opts internal_xattrs;
    6862          52 :         char *xattr_names[] = { BLOB_SNAPSHOT };
    6863             : 
    6864          52 :         if (bserrno != 0) {
    6865           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6866           0 :                 return;
    6867             :         }
    6868             : 
    6869          52 :         ctx->original.blob = _blob;
    6870          52 :         ctx->original.md_ro = _blob->md_ro;
    6871             : 
    6872          52 :         if (!_blob->data_ro || !_blob->md_ro) {
    6873           4 :                 SPDK_DEBUGLOG(blob, "Clone not from read-only blob\n");
    6874           4 :                 ctx->bserrno = -EINVAL;
    6875           4 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6876           4 :                 return;
    6877             :         }
    6878             : 
    6879          48 :         if (_blob->locked_operation_in_progress) {
    6880           0 :                 SPDK_DEBUGLOG(blob, "Cannot create clone - another operation in progress\n");
    6881           0 :                 ctx->bserrno = -EBUSY;
    6882           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6883           0 :                 return;
    6884             :         }
    6885             : 
    6886          48 :         _blob->locked_operation_in_progress = true;
    6887             : 
    6888          48 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6889          48 :         blob_xattrs_init(&internal_xattrs);
    6890             : 
    6891          48 :         opts.thin_provision = true;
    6892          48 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6893          48 :         opts.use_extent_table = _blob->use_extent_table;
    6894          48 :         if (ctx->xattrs) {
    6895           4 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6896             :         }
    6897             : 
    6898             :         /* Set internal xattr BLOB_SNAPSHOT */
    6899          48 :         internal_xattrs.count = 1;
    6900          48 :         internal_xattrs.ctx = _blob;
    6901          48 :         internal_xattrs.names = xattr_names;
    6902          48 :         internal_xattrs.get_value = bs_xattr_clone;
    6903             : 
    6904          48 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6905             :                        bs_clone_newblob_create_cpl, ctx);
    6906             : }
    6907             : 
    6908             : void
    6909          52 : spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6910             :                      const struct spdk_blob_xattr_opts *clone_xattrs,
    6911             :                      spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6912             : {
    6913          52 :         struct spdk_clone_snapshot_ctx  *ctx = calloc(1, sizeof(*ctx));
    6914             : 
    6915          52 :         if (!ctx) {
    6916           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6917           0 :                 return;
    6918             :         }
    6919             : 
    6920          52 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6921          52 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6922          52 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6923          52 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6924          52 :         ctx->bserrno = 0;
    6925          52 :         ctx->xattrs = clone_xattrs;
    6926          52 :         ctx->original.id = blobid;
    6927             : 
    6928          52 :         spdk_bs_open_blob(bs, ctx->original.id, bs_clone_origblob_open_cpl, ctx);
    6929             : }
    6930             : 
    6931             : /* END spdk_bs_create_clone */
    6932             : 
    6933             : /* START spdk_bs_inflate_blob */
    6934             : 
    6935             : static void
    6936          12 : bs_inflate_blob_set_parent_cpl(void *cb_arg, struct spdk_blob *_parent, int bserrno)
    6937             : {
    6938          12 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6939          12 :         struct spdk_blob *_blob = ctx->original.blob;
    6940             : 
    6941          12 :         if (bserrno != 0) {
    6942           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6943           0 :                 return;
    6944             :         }
    6945             : 
    6946             :         /* Temporarily override md_ro flag for MD modification */
    6947          12 :         _blob->md_ro = false;
    6948             : 
    6949          12 :         bserrno = blob_set_xattr(_blob, BLOB_SNAPSHOT, &_parent->id, sizeof(spdk_blob_id), true);
    6950          12 :         if (bserrno != 0) {
    6951           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6952           0 :                 return;
    6953             :         }
    6954             : 
    6955          12 :         assert(_parent != NULL);
    6956             : 
    6957          12 :         bs_blob_list_remove(_blob);
    6958          12 :         _blob->parent_id = _parent->id;
    6959             : 
    6960          12 :         blob_back_bs_destroy(_blob);
    6961          12 :         _blob->back_bs_dev = bs_create_blob_bs_dev(_parent);
    6962          12 :         bs_blob_list_add(_blob);
    6963             : 
    6964          12 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    6965             : }
    6966             : 
    6967             : static void
    6968          56 : bs_inflate_blob_done(struct spdk_clone_snapshot_ctx *ctx)
    6969             : {
    6970          56 :         struct spdk_blob *_blob = ctx->original.blob;
    6971             :         struct spdk_blob *_parent;
    6972             : 
    6973          56 :         if (ctx->allocate_all) {
    6974             :                 /* remove thin provisioning */
    6975          32 :                 bs_blob_list_remove(_blob);
    6976          32 :                 if (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    6977           8 :                         blob_remove_xattr(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    6978           8 :                         _blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6979             :                 } else {
    6980          24 :                         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    6981             :                 }
    6982          32 :                 _blob->invalid_flags = _blob->invalid_flags & ~SPDK_BLOB_THIN_PROV;
    6983          32 :                 blob_back_bs_destroy(_blob);
    6984          32 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    6985             :         } else {
    6986             :                 /* For now, esnap clones always have allocate_all set. */
    6987          24 :                 assert(!blob_is_esnap_clone(_blob));
    6988             : 
    6989          24 :                 _parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
    6990          24 :                 if (_parent->parent_id != SPDK_BLOBID_INVALID) {
    6991             :                         /* We must change the parent of the inflated blob */
    6992          12 :                         spdk_bs_open_blob(_blob->bs, _parent->parent_id,
    6993             :                                           bs_inflate_blob_set_parent_cpl, ctx);
    6994          12 :                         return;
    6995             :                 }
    6996             : 
    6997          12 :                 bs_blob_list_remove(_blob);
    6998          12 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    6999          12 :                 blob_back_bs_destroy(_blob);
    7000          12 :                 _blob->back_bs_dev = bs_create_zeroes_dev();
    7001             :         }
    7002             : 
    7003             :         /* Temporarily override md_ro flag for MD modification */
    7004          44 :         _blob->md_ro = false;
    7005          44 :         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7006          44 :         _blob->state = SPDK_BLOB_STATE_DIRTY;
    7007             : 
    7008          44 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    7009             : }
    7010             : 
    7011             : /* Check if cluster needs allocation */
    7012             : static inline bool
    7013        1200 : bs_cluster_needs_allocation(struct spdk_blob *blob, uint64_t cluster, bool allocate_all)
    7014             : {
    7015             :         struct spdk_blob_bs_dev *b;
    7016             : 
    7017        1200 :         assert(blob != NULL);
    7018             : 
    7019        1200 :         if (blob->active.clusters[cluster] != 0) {
    7020             :                 /* Cluster is already allocated */
    7021          32 :                 return false;
    7022             :         }
    7023             : 
    7024        1168 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    7025             :                 /* Blob have no parent blob */
    7026          80 :                 return allocate_all;
    7027             :         }
    7028             : 
    7029        1088 :         if (blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    7030          64 :                 return true;
    7031             :         }
    7032             : 
    7033        1024 :         b = (struct spdk_blob_bs_dev *)blob->back_bs_dev;
    7034        1024 :         return (allocate_all || b->blob->active.clusters[cluster] != 0);
    7035             : }
    7036             : 
    7037             : static void
    7038         508 : bs_inflate_blob_touch_next(void *cb_arg, int bserrno)
    7039             : {
    7040         508 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7041         508 :         struct spdk_blob *_blob = ctx->original.blob;
    7042         508 :         struct spdk_bs_cpl cpl;
    7043             :         spdk_bs_user_op_t *op;
    7044             :         uint64_t offset;
    7045             : 
    7046         508 :         if (bserrno != 0) {
    7047           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    7048           0 :                 return;
    7049             :         }
    7050             : 
    7051         656 :         for (; ctx->cluster < _blob->active.num_clusters; ctx->cluster++) {
    7052         600 :                 if (bs_cluster_needs_allocation(_blob, ctx->cluster, ctx->allocate_all)) {
    7053         452 :                         break;
    7054             :                 }
    7055             :         }
    7056             : 
    7057         508 :         if (ctx->cluster < _blob->active.num_clusters) {
    7058         452 :                 offset = bs_cluster_to_lba(_blob->bs, ctx->cluster);
    7059             : 
    7060             :                 /* We may safely increment a cluster before copying */
    7061         452 :                 ctx->cluster++;
    7062             : 
    7063             :                 /* Use a dummy 0B read as a context for cluster copy */
    7064         452 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7065         452 :                 cpl.u.blob_basic.cb_fn = bs_inflate_blob_touch_next;
    7066         452 :                 cpl.u.blob_basic.cb_arg = ctx;
    7067             : 
    7068         452 :                 op = bs_user_op_alloc(ctx->channel, &cpl, SPDK_BLOB_READ, _blob,
    7069             :                                       NULL, 0, offset, 0);
    7070         452 :                 if (!op) {
    7071           0 :                         bs_clone_snapshot_origblob_cleanup(ctx, -ENOMEM);
    7072           0 :                         return;
    7073             :                 }
    7074             : 
    7075         452 :                 bs_allocate_and_copy_cluster(_blob, ctx->channel, offset, op);
    7076             :         } else {
    7077          56 :                 bs_inflate_blob_done(ctx);
    7078             :         }
    7079             : }
    7080             : 
    7081             : static void
    7082          60 : bs_inflate_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7083             : {
    7084          60 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7085             :         uint64_t clusters_needed;
    7086             :         uint64_t i;
    7087             : 
    7088          60 :         if (bserrno != 0) {
    7089           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    7090           0 :                 return;
    7091             :         }
    7092             : 
    7093          60 :         ctx->original.blob = _blob;
    7094          60 :         ctx->original.md_ro = _blob->md_ro;
    7095             : 
    7096          60 :         if (_blob->locked_operation_in_progress) {
    7097           0 :                 SPDK_DEBUGLOG(blob, "Cannot inflate blob - another operation in progress\n");
    7098           0 :                 ctx->bserrno = -EBUSY;
    7099           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    7100           0 :                 return;
    7101             :         }
    7102             : 
    7103          60 :         _blob->locked_operation_in_progress = true;
    7104             : 
    7105          60 :         switch (_blob->parent_id) {
    7106           8 :         case SPDK_BLOBID_INVALID:
    7107           8 :                 if (!ctx->allocate_all) {
    7108             :                         /* This blob has no parent, so we cannot decouple it. */
    7109           4 :                         SPDK_ERRLOG("Cannot decouple parent of blob with no parent.\n");
    7110           4 :                         bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
    7111           4 :                         return;
    7112             :                 }
    7113           4 :                 break;
    7114           8 :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    7115             :                 /*
    7116             :                  * It would be better to rely on back_bs_dev->is_zeroes(), to determine which
    7117             :                  * clusters require allocation. Until there is a blobstore consumer that
    7118             :                  * uses esnaps with an spdk_bs_dev that implements a useful is_zeroes() it is not
    7119             :                  * worth the effort.
    7120             :                  */
    7121           8 :                 ctx->allocate_all = true;
    7122           8 :                 break;
    7123          44 :         default:
    7124          44 :                 break;
    7125             :         }
    7126             : 
    7127          56 :         if (spdk_blob_is_thin_provisioned(_blob) == false) {
    7128             :                 /* This is not thin provisioned blob. No need to inflate. */
    7129           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, 0);
    7130           0 :                 return;
    7131             :         }
    7132             : 
    7133             :         /* Do two passes - one to verify that we can obtain enough clusters
    7134             :          * and another to actually claim them.
    7135             :          */
    7136          56 :         clusters_needed = 0;
    7137         656 :         for (i = 0; i < _blob->active.num_clusters; i++) {
    7138         600 :                 if (bs_cluster_needs_allocation(_blob, i, ctx->allocate_all)) {
    7139         452 :                         clusters_needed++;
    7140             :                 }
    7141             :         }
    7142             : 
    7143          56 :         if (clusters_needed > _blob->bs->num_free_clusters) {
    7144             :                 /* Not enough free clusters. Cannot satisfy the request. */
    7145           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, -ENOSPC);
    7146           0 :                 return;
    7147             :         }
    7148             : 
    7149          56 :         ctx->cluster = 0;
    7150          56 :         bs_inflate_blob_touch_next(ctx, 0);
    7151             : }
    7152             : 
    7153             : static void
    7154          60 : bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7155             :                 spdk_blob_id blobid, bool allocate_all, spdk_blob_op_complete cb_fn, void *cb_arg)
    7156             : {
    7157          60 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    7158             : 
    7159          60 :         if (!ctx) {
    7160           0 :                 cb_fn(cb_arg, -ENOMEM);
    7161           0 :                 return;
    7162             :         }
    7163          60 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7164          60 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7165          60 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7166          60 :         ctx->bserrno = 0;
    7167          60 :         ctx->original.id = blobid;
    7168          60 :         ctx->channel = channel;
    7169          60 :         ctx->allocate_all = allocate_all;
    7170             : 
    7171          60 :         spdk_bs_open_blob(bs, ctx->original.id, bs_inflate_blob_open_cpl, ctx);
    7172             : }
    7173             : 
    7174             : void
    7175          28 : spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7176             :                      spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7177             : {
    7178          28 :         bs_inflate_blob(bs, channel, blobid, true, cb_fn, cb_arg);
    7179          28 : }
    7180             : 
    7181             : void
    7182          32 : spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7183             :                              spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7184             : {
    7185          32 :         bs_inflate_blob(bs, channel, blobid, false, cb_fn, cb_arg);
    7186          32 : }
    7187             : /* END spdk_bs_inflate_blob */
    7188             : 
    7189             : /* START spdk_bs_blob_shallow_copy */
    7190             : 
    7191             : struct shallow_copy_ctx {
    7192             :         struct spdk_bs_cpl cpl;
    7193             :         int bserrno;
    7194             : 
    7195             :         /* Blob source for copy */
    7196             :         struct spdk_blob_store *bs;
    7197             :         spdk_blob_id blobid;
    7198             :         struct spdk_blob *blob;
    7199             :         struct spdk_io_channel *blob_channel;
    7200             : 
    7201             :         /* Destination device for copy */
    7202             :         struct spdk_bs_dev *ext_dev;
    7203             :         struct spdk_io_channel *ext_channel;
    7204             : 
    7205             :         /* Current cluster for copy operation */
    7206             :         uint64_t cluster;
    7207             : 
    7208             :         /* Buffer for blob reading */
    7209             :         uint8_t *read_buff;
    7210             : 
    7211             :         /* Struct for external device writing */
    7212             :         struct spdk_bs_dev_cb_args ext_args;
    7213             : 
    7214             :         /* Actual number of copied clusters */
    7215             :         uint64_t copied_clusters_count;
    7216             : 
    7217             :         /* Status callback for updates about the ongoing operation */
    7218             :         spdk_blob_shallow_copy_status status_cb;
    7219             : 
    7220             :         /* Argument passed to function status_cb */
    7221             :         void *status_cb_arg;
    7222             : };
    7223             : 
    7224             : static void
    7225          16 : bs_shallow_copy_cleanup_finish(void *cb_arg, int bserrno)
    7226             : {
    7227          16 :         struct shallow_copy_ctx *ctx = cb_arg;
    7228          16 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    7229             : 
    7230          16 :         if (bserrno != 0) {
    7231           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, cleanup error %d\n", ctx->blob->id, bserrno);
    7232           0 :                 ctx->bserrno = bserrno;
    7233             :         }
    7234             : 
    7235          16 :         ctx->ext_dev->destroy_channel(ctx->ext_dev, ctx->ext_channel);
    7236          16 :         spdk_free(ctx->read_buff);
    7237             : 
    7238          16 :         cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    7239             : 
    7240          16 :         free(ctx);
    7241          16 : }
    7242             : 
    7243             : static void
    7244           8 : bs_shallow_copy_bdev_write_cpl(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
    7245             : {
    7246           8 :         struct shallow_copy_ctx *ctx = cb_arg;
    7247           8 :         struct spdk_blob *_blob = ctx->blob;
    7248             : 
    7249           8 :         if (bserrno != 0) {
    7250           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, ext dev write error %d\n", ctx->blob->id, bserrno);
    7251           0 :                 ctx->bserrno = bserrno;
    7252           0 :                 _blob->locked_operation_in_progress = false;
    7253           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7254           0 :                 return;
    7255             :         }
    7256             : 
    7257           8 :         ctx->cluster++;
    7258           8 :         if (ctx->status_cb) {
    7259           8 :                 ctx->copied_clusters_count++;
    7260           8 :                 ctx->status_cb(ctx->copied_clusters_count, ctx->status_cb_arg);
    7261             :         }
    7262             : 
    7263           8 :         bs_shallow_copy_cluster_find_next(ctx);
    7264             : }
    7265             : 
    7266             : static void
    7267           8 : bs_shallow_copy_blob_read_cpl(void *cb_arg, int bserrno)
    7268             : {
    7269           8 :         struct shallow_copy_ctx *ctx = cb_arg;
    7270           8 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7271           8 :         struct spdk_blob *_blob = ctx->blob;
    7272             : 
    7273           8 :         if (bserrno != 0) {
    7274           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob read error %d\n", ctx->blob->id, bserrno);
    7275           0 :                 ctx->bserrno = bserrno;
    7276           0 :                 _blob->locked_operation_in_progress = false;
    7277           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7278           0 :                 return;
    7279             :         }
    7280             : 
    7281           8 :         ctx->ext_args.channel = ctx->ext_channel;
    7282           8 :         ctx->ext_args.cb_fn = bs_shallow_copy_bdev_write_cpl;
    7283           8 :         ctx->ext_args.cb_arg = ctx;
    7284             : 
    7285           8 :         ext_dev->write(ext_dev, ctx->ext_channel, ctx->read_buff,
    7286           8 :                        bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7287           8 :                        bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7288             :                        &ctx->ext_args);
    7289             : }
    7290             : 
    7291             : static void
    7292          12 : bs_shallow_copy_cluster_find_next(void *cb_arg)
    7293             : {
    7294          12 :         struct shallow_copy_ctx *ctx = cb_arg;
    7295          12 :         struct spdk_blob *_blob = ctx->blob;
    7296             : 
    7297          20 :         while (ctx->cluster < _blob->active.num_clusters) {
    7298          16 :                 if (_blob->active.clusters[ctx->cluster] != 0) {
    7299           8 :                         break;
    7300             :                 }
    7301             : 
    7302           8 :                 ctx->cluster++;
    7303             :         }
    7304             : 
    7305          12 :         if (ctx->cluster < _blob->active.num_clusters) {
    7306           8 :                 blob_request_submit_op_single(ctx->blob_channel, _blob, ctx->read_buff,
    7307           8 :                                               bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7308           8 :                                               bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7309             :                                               bs_shallow_copy_blob_read_cpl, ctx, SPDK_BLOB_READ);
    7310             :         } else {
    7311           4 :                 _blob->locked_operation_in_progress = false;
    7312           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7313             :         }
    7314          12 : }
    7315             : 
    7316             : static void
    7317          16 : bs_shallow_copy_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7318             : {
    7319          16 :         struct shallow_copy_ctx *ctx = cb_arg;
    7320          16 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7321             :         uint32_t blob_block_size;
    7322             :         uint64_t blob_total_size;
    7323             : 
    7324          16 :         if (bserrno != 0) {
    7325           0 :                 SPDK_ERRLOG("Shallow copy blob open error %d\n", bserrno);
    7326           0 :                 ctx->bserrno = bserrno;
    7327           0 :                 bs_shallow_copy_cleanup_finish(ctx, 0);
    7328           0 :                 return;
    7329             :         }
    7330             : 
    7331          16 :         if (!spdk_blob_is_read_only(_blob)) {
    7332           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob must be read only\n", _blob->id);
    7333           4 :                 ctx->bserrno = -EPERM;
    7334           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7335           4 :                 return;
    7336             :         }
    7337             : 
    7338          12 :         blob_block_size = _blob->bs->dev->blocklen;
    7339          12 :         blob_total_size = spdk_blob_get_num_clusters(_blob) * spdk_bs_get_cluster_size(_blob->bs);
    7340             : 
    7341          12 :         if (blob_total_size > ext_dev->blockcnt * ext_dev->blocklen) {
    7342           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device must have at least blob size\n",
    7343             :                             _blob->id);
    7344           4 :                 ctx->bserrno = -EINVAL;
    7345           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7346           4 :                 return;
    7347             :         }
    7348             : 
    7349           8 :         if (blob_block_size % ext_dev->blocklen != 0) {
    7350           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device block size is not compatible with \
    7351             : blobstore block size\n", _blob->id);
    7352           4 :                 ctx->bserrno = -EINVAL;
    7353           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7354           4 :                 return;
    7355             :         }
    7356             : 
    7357           4 :         ctx->blob = _blob;
    7358             : 
    7359           4 :         if (_blob->locked_operation_in_progress) {
    7360           0 :                 SPDK_DEBUGLOG(blob, "blob 0x%" PRIx64 " shallow copy - another operation in progress\n", _blob->id);
    7361           0 :                 ctx->bserrno = -EBUSY;
    7362           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7363           0 :                 return;
    7364             :         }
    7365             : 
    7366           4 :         _blob->locked_operation_in_progress = true;
    7367             : 
    7368           4 :         ctx->cluster = 0;
    7369           4 :         bs_shallow_copy_cluster_find_next(ctx);
    7370             : }
    7371             : 
    7372             : int
    7373          16 : spdk_bs_blob_shallow_copy(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7374             :                           spdk_blob_id blobid, struct spdk_bs_dev *ext_dev,
    7375             :                           spdk_blob_shallow_copy_status status_cb_fn, void *status_cb_arg,
    7376             :                           spdk_blob_op_complete cb_fn, void *cb_arg)
    7377             : {
    7378             :         struct shallow_copy_ctx *ctx;
    7379             :         struct spdk_io_channel *ext_channel;
    7380             : 
    7381          16 :         ctx = calloc(1, sizeof(*ctx));
    7382          16 :         if (!ctx) {
    7383           0 :                 return -ENOMEM;
    7384             :         }
    7385             : 
    7386          16 :         ctx->bs = bs;
    7387          16 :         ctx->blobid = blobid;
    7388          16 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7389          16 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7390          16 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7391          16 :         ctx->bserrno = 0;
    7392          16 :         ctx->blob_channel = channel;
    7393          16 :         ctx->status_cb = status_cb_fn;
    7394          16 :         ctx->status_cb_arg = status_cb_arg;
    7395          16 :         ctx->read_buff = spdk_malloc(bs->cluster_sz, bs->dev->blocklen, NULL,
    7396             :                                      SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
    7397          16 :         if (!ctx->read_buff) {
    7398           0 :                 free(ctx);
    7399           0 :                 return -ENOMEM;
    7400             :         }
    7401             : 
    7402          16 :         ext_channel = ext_dev->create_channel(ext_dev);
    7403          16 :         if (!ext_channel) {
    7404           0 :                 spdk_free(ctx->read_buff);
    7405           0 :                 free(ctx);
    7406           0 :                 return -ENOMEM;
    7407             :         }
    7408          16 :         ctx->ext_dev = ext_dev;
    7409          16 :         ctx->ext_channel = ext_channel;
    7410             : 
    7411          16 :         spdk_bs_open_blob(ctx->bs, ctx->blobid, bs_shallow_copy_blob_open_cpl, ctx);
    7412             : 
    7413          16 :         return 0;
    7414             : }
    7415             : /* END spdk_bs_blob_shallow_copy */
    7416             : 
    7417             : /* START spdk_bs_blob_set_parent */
    7418             : 
    7419             : struct set_parent_ctx {
    7420             :         struct spdk_blob_store *bs;
    7421             :         int                     bserrno;
    7422             :         spdk_bs_op_complete     cb_fn;
    7423             :         void                    *cb_arg;
    7424             : 
    7425             :         struct spdk_blob        *blob;
    7426             :         bool                    blob_md_ro;
    7427             : 
    7428             :         struct blob_parent      parent;
    7429             : };
    7430             : 
    7431             : static void
    7432          24 : bs_set_parent_cleanup_finish(void *cb_arg, int bserrno)
    7433             : {
    7434          24 :         struct set_parent_ctx *ctx = cb_arg;
    7435             : 
    7436          24 :         assert(ctx != NULL);
    7437             : 
    7438          24 :         if (bserrno != 0) {
    7439           0 :                 SPDK_ERRLOG("blob set parent finish error %d\n", bserrno);
    7440           0 :                 if (ctx->bserrno == 0) {
    7441           0 :                         ctx->bserrno = bserrno;
    7442             :                 }
    7443             :         }
    7444             : 
    7445          24 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7446             : 
    7447          24 :         free(ctx);
    7448          24 : }
    7449             : 
    7450             : static void
    7451          20 : bs_set_parent_close_snapshot(void *cb_arg, int bserrno)
    7452             : {
    7453          20 :         struct set_parent_ctx *ctx = cb_arg;
    7454             : 
    7455          20 :         if (ctx->bserrno != 0) {
    7456           8 :                 spdk_blob_close(ctx->parent.u.snapshot.blob, bs_set_parent_cleanup_finish, ctx);
    7457           8 :                 return;
    7458             :         }
    7459             : 
    7460          12 :         if (bserrno != 0) {
    7461           0 :                 SPDK_ERRLOG("blob close error %d\n", bserrno);
    7462           0 :                 ctx->bserrno = bserrno;
    7463             :         }
    7464             : 
    7465          12 :         bs_set_parent_cleanup_finish(ctx, ctx->bserrno);
    7466             : }
    7467             : 
    7468             : static void
    7469          12 : bs_set_parent_close_blob(void *cb_arg, int bserrno)
    7470             : {
    7471          12 :         struct set_parent_ctx *ctx = cb_arg;
    7472          12 :         struct spdk_blob *blob = ctx->blob;
    7473          12 :         struct spdk_blob *snapshot = ctx->parent.u.snapshot.blob;
    7474             : 
    7475          12 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7476           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7477           0 :                 ctx->bserrno = bserrno;
    7478             :         }
    7479             : 
    7480             :         /* Revert md_ro to original state */
    7481          12 :         blob->md_ro = ctx->blob_md_ro;
    7482             : 
    7483          12 :         blob->locked_operation_in_progress = false;
    7484          12 :         snapshot->locked_operation_in_progress = false;
    7485             : 
    7486          12 :         spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7487          12 : }
    7488             : 
    7489             : static void
    7490          12 : bs_set_parent_set_back_bs_dev_done(void *cb_arg, int bserrno)
    7491             : {
    7492          12 :         struct set_parent_ctx *ctx = cb_arg;
    7493          12 :         struct spdk_blob *blob = ctx->blob;
    7494             : 
    7495          12 :         if (bserrno != 0) {
    7496           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7497           0 :                 ctx->bserrno = bserrno;
    7498           0 :                 bs_set_parent_close_blob(ctx, bserrno);
    7499           0 :                 return;
    7500             :         }
    7501             : 
    7502          12 :         spdk_blob_sync_md(blob, bs_set_parent_close_blob, ctx);
    7503             : }
    7504             : 
    7505             : static int
    7506          12 : bs_set_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7507             : {
    7508             :         int rc;
    7509             : 
    7510          12 :         bs_blob_list_remove(blob);
    7511             : 
    7512          12 :         rc = blob_set_xattr(blob, BLOB_SNAPSHOT, &parent->u.snapshot.id, sizeof(spdk_blob_id), true);
    7513          12 :         if (rc != 0) {
    7514           0 :                 SPDK_ERRLOG("error %d setting snapshot xattr\n", rc);
    7515           0 :                 return rc;
    7516             :         }
    7517          12 :         blob->parent_id = parent->u.snapshot.id;
    7518             : 
    7519          12 :         if (blob_is_esnap_clone(blob)) {
    7520             :                 /* Remove the xattr that references the external snapshot */
    7521           4 :                 blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7522           4 :                 blob_remove_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    7523             :         }
    7524             : 
    7525          12 :         bs_blob_list_add(blob);
    7526             : 
    7527          12 :         return 0;
    7528             : }
    7529             : 
    7530             : static void
    7531          20 : bs_set_parent_snapshot_open_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    7532             : {
    7533          20 :         struct set_parent_ctx *ctx = cb_arg;
    7534          20 :         struct spdk_blob *blob = ctx->blob;
    7535             :         struct spdk_bs_dev *back_bs_dev;
    7536             : 
    7537          20 :         if (bserrno != 0) {
    7538           0 :                 SPDK_ERRLOG("snapshot open error %d\n", bserrno);
    7539           0 :                 ctx->bserrno = bserrno;
    7540           0 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7541           0 :                 return;
    7542             :         }
    7543             : 
    7544          20 :         ctx->parent.u.snapshot.blob = snapshot;
    7545          20 :         ctx->parent.u.snapshot.id = snapshot->id;
    7546             : 
    7547          20 :         if (!spdk_blob_is_snapshot(snapshot)) {
    7548           4 :                 SPDK_ERRLOG("parent blob is not a snapshot\n");
    7549           4 :                 ctx->bserrno = -EINVAL;
    7550           4 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7551           4 :                 return;
    7552             :         }
    7553             : 
    7554          16 :         if (blob->active.num_clusters != snapshot->active.num_clusters) {
    7555           4 :                 SPDK_ERRLOG("parent blob has a number of clusters different from child's ones\n");
    7556           4 :                 ctx->bserrno = -EINVAL;
    7557           4 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7558           4 :                 return;
    7559             :         }
    7560             : 
    7561          12 :         if (blob->locked_operation_in_progress || snapshot->locked_operation_in_progress) {
    7562           0 :                 SPDK_ERRLOG("cannot set parent of blob, another operation in progress\n");
    7563           0 :                 ctx->bserrno = -EBUSY;
    7564           0 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7565           0 :                 return;
    7566             :         }
    7567             : 
    7568          12 :         blob->locked_operation_in_progress = true;
    7569          12 :         snapshot->locked_operation_in_progress = true;
    7570             : 
    7571             :         /* Temporarily override md_ro flag for MD modification */
    7572          12 :         blob->md_ro = false;
    7573             : 
    7574          12 :         back_bs_dev = bs_create_blob_bs_dev(snapshot);
    7575             : 
    7576          12 :         blob_set_back_bs_dev(blob, back_bs_dev, bs_set_parent_refs, &ctx->parent,
    7577             :                              bs_set_parent_set_back_bs_dev_done,
    7578             :                              ctx);
    7579             : }
    7580             : 
    7581             : static void
    7582          24 : bs_set_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7583             : {
    7584          24 :         struct set_parent_ctx *ctx = cb_arg;
    7585             : 
    7586          24 :         if (bserrno != 0) {
    7587           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7588           0 :                 ctx->bserrno = bserrno;
    7589           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7590           0 :                 return;
    7591             :         }
    7592             : 
    7593          24 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7594           4 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7595           4 :                 ctx->bserrno = -EINVAL;
    7596           4 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7597           4 :                 return;
    7598             :         }
    7599             : 
    7600          20 :         ctx->blob = blob;
    7601          20 :         ctx->blob_md_ro = blob->md_ro;
    7602             : 
    7603          20 :         spdk_bs_open_blob(ctx->bs, ctx->parent.u.snapshot.id, bs_set_parent_snapshot_open_cpl, ctx);
    7604             : }
    7605             : 
    7606             : void
    7607          36 : spdk_bs_blob_set_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7608             :                         spdk_blob_id snapshot_id, spdk_blob_op_complete cb_fn, void *cb_arg)
    7609             : {
    7610             :         struct set_parent_ctx *ctx;
    7611             : 
    7612          36 :         if (snapshot_id == SPDK_BLOBID_INVALID) {
    7613           4 :                 SPDK_ERRLOG("snapshot id not valid\n");
    7614           4 :                 cb_fn(cb_arg, -EINVAL);
    7615           4 :                 return;
    7616             :         }
    7617             : 
    7618          32 :         if (blob_id == snapshot_id) {
    7619           4 :                 SPDK_ERRLOG("blob id and snapshot id cannot be the same\n");
    7620           4 :                 cb_fn(cb_arg, -EINVAL);
    7621           4 :                 return;
    7622             :         }
    7623             : 
    7624          28 :         if (spdk_blob_get_parent_snapshot(bs, blob_id) == snapshot_id) {
    7625           4 :                 SPDK_NOTICELOG("snapshot is already the parent of blob\n");
    7626           4 :                 cb_fn(cb_arg, -EEXIST);
    7627           4 :                 return;
    7628             :         }
    7629             : 
    7630          24 :         ctx = calloc(1, sizeof(*ctx));
    7631          24 :         if (!ctx) {
    7632           0 :                 cb_fn(cb_arg, -ENOMEM);
    7633           0 :                 return;
    7634             :         }
    7635             : 
    7636          24 :         ctx->bs = bs;
    7637          24 :         ctx->parent.u.snapshot.id = snapshot_id;
    7638          24 :         ctx->cb_fn = cb_fn;
    7639          24 :         ctx->cb_arg = cb_arg;
    7640          24 :         ctx->bserrno = 0;
    7641             : 
    7642          24 :         spdk_bs_open_blob(bs, blob_id, bs_set_parent_blob_open_cpl, ctx);
    7643             : }
    7644             : /* END spdk_bs_blob_set_parent */
    7645             : 
    7646             : /* START spdk_bs_blob_set_external_parent */
    7647             : 
    7648             : static void
    7649          16 : bs_set_external_parent_cleanup_finish(void *cb_arg, int bserrno)
    7650             : {
    7651          16 :         struct set_parent_ctx *ctx = cb_arg;
    7652             : 
    7653          16 :         if (bserrno != 0) {
    7654           0 :                 SPDK_ERRLOG("blob set external parent finish error %d\n", bserrno);
    7655           0 :                 if (ctx->bserrno == 0) {
    7656           0 :                         ctx->bserrno = bserrno;
    7657             :                 }
    7658             :         }
    7659             : 
    7660          16 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7661             : 
    7662          16 :         free(ctx->parent.u.esnap.id);
    7663          16 :         free(ctx);
    7664          16 : }
    7665             : 
    7666             : static void
    7667           8 : bs_set_external_parent_close_blob(void *cb_arg, int bserrno)
    7668             : {
    7669           8 :         struct set_parent_ctx *ctx = cb_arg;
    7670           8 :         struct spdk_blob *blob = ctx->blob;
    7671             : 
    7672           8 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7673           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7674           0 :                 ctx->bserrno = bserrno;
    7675             :         }
    7676             : 
    7677             :         /* Revert md_ro to original state */
    7678           8 :         blob->md_ro = ctx->blob_md_ro;
    7679             : 
    7680           8 :         blob->locked_operation_in_progress = false;
    7681             : 
    7682           8 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7683           8 : }
    7684             : 
    7685             : static void
    7686           8 : bs_set_external_parent_unfrozen(void *cb_arg, int bserrno)
    7687             : {
    7688           8 :         struct set_parent_ctx *ctx = cb_arg;
    7689           8 :         struct spdk_blob *blob = ctx->blob;
    7690             : 
    7691           8 :         if (bserrno != 0) {
    7692           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7693           0 :                 ctx->bserrno = bserrno;
    7694           0 :                 bs_set_external_parent_close_blob(ctx, bserrno);
    7695           0 :                 return;
    7696             :         }
    7697             : 
    7698           8 :         spdk_blob_sync_md(blob, bs_set_external_parent_close_blob, ctx);
    7699             : }
    7700             : 
    7701             : static int
    7702           8 : bs_set_external_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7703             : {
    7704             :         int rc;
    7705             : 
    7706           8 :         bs_blob_list_remove(blob);
    7707             : 
    7708           8 :         if (spdk_blob_is_clone(blob)) {
    7709             :                 /* Remove the xattr that references the snapshot */
    7710           0 :                 blob->parent_id = SPDK_BLOBID_INVALID;
    7711           0 :                 blob_remove_xattr(blob, BLOB_SNAPSHOT, true);
    7712             :         }
    7713             : 
    7714           8 :         rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, parent->u.esnap.id,
    7715           8 :                             parent->u.esnap.id_len, true);
    7716           8 :         if (rc != 0) {
    7717           0 :                 SPDK_ERRLOG("error %d setting external snapshot xattr\n", rc);
    7718           0 :                 return rc;
    7719             :         }
    7720           8 :         blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7721             : 
    7722           8 :         bs_blob_list_add(blob);
    7723             : 
    7724           8 :         return 0;
    7725             : }
    7726             : 
    7727             : static void
    7728          16 : bs_set_external_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7729             : {
    7730          16 :         struct set_parent_ctx *ctx = cb_arg;
    7731          16 :         const void *esnap_id;
    7732          16 :         size_t esnap_id_len;
    7733             :         int rc;
    7734             : 
    7735          16 :         if (bserrno != 0) {
    7736           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7737           0 :                 ctx->bserrno = bserrno;
    7738           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7739           0 :                 return;
    7740             :         }
    7741             : 
    7742          16 :         ctx->blob = blob;
    7743          16 :         ctx->blob_md_ro = blob->md_ro;
    7744             : 
    7745          16 :         rc = spdk_blob_get_esnap_id(blob, &esnap_id, &esnap_id_len);
    7746          16 :         if (rc == 0 && esnap_id != NULL && esnap_id_len == ctx->parent.u.esnap.id_len &&
    7747           4 :             memcmp(esnap_id, ctx->parent.u.esnap.id, esnap_id_len) == 0) {
    7748           4 :                 SPDK_ERRLOG("external snapshot is already the parent of blob\n");
    7749           4 :                 ctx->bserrno = -EEXIST;
    7750           4 :                 goto error;
    7751             :         }
    7752             : 
    7753          12 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7754           4 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7755           4 :                 ctx->bserrno = -EINVAL;
    7756           4 :                 goto error;
    7757             :         }
    7758             : 
    7759           8 :         if (blob->locked_operation_in_progress) {
    7760           0 :                 SPDK_ERRLOG("cannot set external parent of blob, another operation in progress\n");
    7761           0 :                 ctx->bserrno = -EBUSY;
    7762           0 :                 goto error;
    7763             :         }
    7764             : 
    7765           8 :         blob->locked_operation_in_progress = true;
    7766             : 
    7767             :         /* Temporarily override md_ro flag for MD modification */
    7768           8 :         blob->md_ro = false;
    7769             : 
    7770           8 :         blob_set_back_bs_dev(blob, ctx->parent.u.esnap.back_bs_dev, bs_set_external_parent_refs,
    7771             :                              &ctx->parent, bs_set_external_parent_unfrozen, ctx);
    7772           8 :         return;
    7773             : 
    7774           8 : error:
    7775           8 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7776             : }
    7777             : 
    7778             : void
    7779          24 : spdk_bs_blob_set_external_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7780             :                                  struct spdk_bs_dev *esnap_bs_dev, const void *esnap_id,
    7781             :                                  uint32_t esnap_id_len, spdk_blob_op_complete cb_fn, void *cb_arg)
    7782             : {
    7783             :         struct set_parent_ctx *ctx;
    7784             :         uint64_t esnap_dev_size, cluster_sz;
    7785             : 
    7786          24 :         if (sizeof(blob_id) == esnap_id_len && memcmp(&blob_id, esnap_id, sizeof(blob_id)) == 0) {
    7787           4 :                 SPDK_ERRLOG("blob id and external snapshot id cannot be the same\n");
    7788           4 :                 cb_fn(cb_arg, -EINVAL);
    7789           4 :                 return;
    7790             :         }
    7791             : 
    7792          20 :         esnap_dev_size = esnap_bs_dev->blockcnt * esnap_bs_dev->blocklen;
    7793          20 :         cluster_sz = spdk_bs_get_cluster_size(bs);
    7794          20 :         if ((esnap_dev_size % cluster_sz) != 0) {
    7795           4 :                 SPDK_ERRLOG("Esnap device size %" PRIu64 " is not an integer multiple of "
    7796             :                             "cluster size %" PRIu64 "\n", esnap_dev_size, cluster_sz);
    7797           4 :                 cb_fn(cb_arg, -EINVAL);
    7798           4 :                 return;
    7799             :         }
    7800             : 
    7801          16 :         ctx = calloc(1, sizeof(*ctx));
    7802          16 :         if (!ctx) {
    7803           0 :                 cb_fn(cb_arg, -ENOMEM);
    7804           0 :                 return;
    7805             :         }
    7806             : 
    7807          16 :         ctx->parent.u.esnap.id = calloc(1, esnap_id_len);
    7808          16 :         if (!ctx->parent.u.esnap.id) {
    7809           0 :                 free(ctx);
    7810           0 :                 cb_fn(cb_arg, -ENOMEM);
    7811           0 :                 return;
    7812             :         }
    7813             : 
    7814          16 :         ctx->bs = bs;
    7815          16 :         ctx->parent.u.esnap.back_bs_dev = esnap_bs_dev;
    7816          16 :         memcpy(ctx->parent.u.esnap.id, esnap_id, esnap_id_len);
    7817          16 :         ctx->parent.u.esnap.id_len = esnap_id_len;
    7818          16 :         ctx->cb_fn = cb_fn;
    7819          16 :         ctx->cb_arg = cb_arg;
    7820          16 :         ctx->bserrno = 0;
    7821             : 
    7822          16 :         spdk_bs_open_blob(bs, blob_id, bs_set_external_parent_blob_open_cpl, ctx);
    7823             : }
    7824             : /* END spdk_bs_blob_set_external_parent */
    7825             : 
    7826             : /* START spdk_blob_resize */
    7827             : struct spdk_bs_resize_ctx {
    7828             :         spdk_blob_op_complete cb_fn;
    7829             :         void *cb_arg;
    7830             :         struct spdk_blob *blob;
    7831             :         uint64_t sz;
    7832             :         int rc;
    7833             : };
    7834             : 
    7835             : static void
    7836         202 : bs_resize_unfreeze_cpl(void *cb_arg, int rc)
    7837             : {
    7838         202 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7839             : 
    7840         202 :         if (rc != 0) {
    7841           0 :                 SPDK_ERRLOG("Unfreeze failed, rc=%d\n", rc);
    7842             :         }
    7843             : 
    7844         202 :         if (ctx->rc != 0) {
    7845           4 :                 SPDK_ERRLOG("Unfreeze failed, ctx->rc=%d\n", ctx->rc);
    7846           4 :                 rc = ctx->rc;
    7847             :         }
    7848             : 
    7849         202 :         ctx->blob->locked_operation_in_progress = false;
    7850             : 
    7851         202 :         ctx->cb_fn(ctx->cb_arg, rc);
    7852         202 :         free(ctx);
    7853         202 : }
    7854             : 
    7855             : static void
    7856         202 : bs_resize_freeze_cpl(void *cb_arg, int rc)
    7857             : {
    7858         202 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7859             : 
    7860         202 :         if (rc != 0) {
    7861           0 :                 ctx->blob->locked_operation_in_progress = false;
    7862           0 :                 ctx->cb_fn(ctx->cb_arg, rc);
    7863           0 :                 free(ctx);
    7864           0 :                 return;
    7865             :         }
    7866             : 
    7867         202 :         ctx->rc = blob_resize(ctx->blob, ctx->sz);
    7868             : 
    7869         202 :         blob_unfreeze_io(ctx->blob, bs_resize_unfreeze_cpl, ctx);
    7870             : }
    7871             : 
    7872             : void
    7873         216 : spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, void *cb_arg)
    7874             : {
    7875             :         struct spdk_bs_resize_ctx *ctx;
    7876             : 
    7877         216 :         blob_verify_md_op(blob);
    7878             : 
    7879         216 :         SPDK_DEBUGLOG(blob, "Resizing blob 0x%" PRIx64 " to %" PRIu64 " clusters\n", blob->id, sz);
    7880             : 
    7881         216 :         if (blob->md_ro) {
    7882           4 :                 cb_fn(cb_arg, -EPERM);
    7883           4 :                 return;
    7884             :         }
    7885             : 
    7886         212 :         if (sz == blob->active.num_clusters) {
    7887          10 :                 cb_fn(cb_arg, 0);
    7888          10 :                 return;
    7889             :         }
    7890             : 
    7891         202 :         if (blob->locked_operation_in_progress) {
    7892           0 :                 cb_fn(cb_arg, -EBUSY);
    7893           0 :                 return;
    7894             :         }
    7895             : 
    7896         202 :         ctx = calloc(1, sizeof(*ctx));
    7897         202 :         if (!ctx) {
    7898           0 :                 cb_fn(cb_arg, -ENOMEM);
    7899           0 :                 return;
    7900             :         }
    7901             : 
    7902         202 :         blob->locked_operation_in_progress = true;
    7903         202 :         ctx->cb_fn = cb_fn;
    7904         202 :         ctx->cb_arg = cb_arg;
    7905         202 :         ctx->blob = blob;
    7906         202 :         ctx->sz = sz;
    7907         202 :         blob_freeze_io(blob, bs_resize_freeze_cpl, ctx);
    7908             : }
    7909             : 
    7910             : /* END spdk_blob_resize */
    7911             : 
    7912             : 
    7913             : /* START spdk_bs_delete_blob */
    7914             : 
    7915             : static void
    7916        1488 : bs_delete_close_cpl(void *cb_arg, int bserrno)
    7917             : {
    7918        1488 :         spdk_bs_sequence_t *seq = cb_arg;
    7919             : 
    7920        1488 :         bs_sequence_finish(seq, bserrno);
    7921        1488 : }
    7922             : 
    7923             : static void
    7924        1488 : bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    7925             : {
    7926        1488 :         struct spdk_blob *blob = cb_arg;
    7927             : 
    7928        1488 :         if (bserrno != 0) {
    7929             :                 /*
    7930             :                  * We already removed this blob from the blobstore tailq, so
    7931             :                  *  we need to free it here since this is the last reference
    7932             :                  *  to it.
    7933             :                  */
    7934           0 :                 blob_free(blob);
    7935           0 :                 bs_delete_close_cpl(seq, bserrno);
    7936           0 :                 return;
    7937             :         }
    7938             : 
    7939             :         /*
    7940             :          * This will immediately decrement the ref_count and call
    7941             :          *  the completion routine since the metadata state is clean.
    7942             :          *  By calling spdk_blob_close, we reduce the number of call
    7943             :          *  points into code that touches the blob->open_ref count
    7944             :          *  and the blobstore's blob list.
    7945             :          */
    7946        1488 :         spdk_blob_close(blob, bs_delete_close_cpl, seq);
    7947             : }
    7948             : 
    7949             : struct delete_snapshot_ctx {
    7950             :         struct spdk_blob_list *parent_snapshot_entry;
    7951             :         struct spdk_blob *snapshot;
    7952             :         struct spdk_blob_md_page *page;
    7953             :         bool snapshot_md_ro;
    7954             :         struct spdk_blob *clone;
    7955             :         bool clone_md_ro;
    7956             :         spdk_blob_op_with_handle_complete cb_fn;
    7957             :         void *cb_arg;
    7958             :         int bserrno;
    7959             :         uint32_t next_extent_page;
    7960             : };
    7961             : 
    7962             : static void
    7963         110 : delete_blob_cleanup_finish(void *cb_arg, int bserrno)
    7964             : {
    7965         110 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7966             : 
    7967         110 :         if (bserrno != 0) {
    7968           0 :                 SPDK_ERRLOG("Snapshot cleanup error %d\n", bserrno);
    7969             :         }
    7970             : 
    7971         110 :         assert(ctx != NULL);
    7972             : 
    7973         110 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7974           0 :                 ctx->bserrno = bserrno;
    7975             :         }
    7976             : 
    7977         110 :         ctx->cb_fn(ctx->cb_arg, ctx->snapshot, ctx->bserrno);
    7978         110 :         spdk_free(ctx->page);
    7979         110 :         free(ctx);
    7980         110 : }
    7981             : 
    7982             : static void
    7983          22 : delete_snapshot_cleanup_snapshot(void *cb_arg, int bserrno)
    7984             : {
    7985          22 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7986             : 
    7987          22 :         if (bserrno != 0) {
    7988           0 :                 ctx->bserrno = bserrno;
    7989           0 :                 SPDK_ERRLOG("Clone cleanup error %d\n", bserrno);
    7990             :         }
    7991             : 
    7992          22 :         if (ctx->bserrno != 0) {
    7993          22 :                 assert(blob_lookup(ctx->snapshot->bs, ctx->snapshot->id) == NULL);
    7994          22 :                 RB_INSERT(spdk_blob_tree, &ctx->snapshot->bs->open_blobs, ctx->snapshot);
    7995          22 :                 spdk_bit_array_set(ctx->snapshot->bs->open_blobids, ctx->snapshot->id);
    7996             :         }
    7997             : 
    7998          22 :         ctx->snapshot->locked_operation_in_progress = false;
    7999          22 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8000             : 
    8001          22 :         spdk_blob_close(ctx->snapshot, delete_blob_cleanup_finish, ctx);
    8002          22 : }
    8003             : 
    8004             : static void
    8005          12 : delete_snapshot_cleanup_clone(void *cb_arg, int bserrno)
    8006             : {
    8007          12 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8008             : 
    8009          12 :         ctx->clone->locked_operation_in_progress = false;
    8010          12 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8011             : 
    8012          12 :         spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8013          12 : }
    8014             : 
    8015             : static void
    8016          48 : delete_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    8017             : {
    8018          48 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8019             : 
    8020          48 :         if (bserrno) {
    8021           0 :                 ctx->bserrno = bserrno;
    8022           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8023           0 :                 return;
    8024             :         }
    8025             : 
    8026          48 :         ctx->clone->locked_operation_in_progress = false;
    8027          48 :         spdk_blob_close(ctx->clone, delete_blob_cleanup_finish, ctx);
    8028             : }
    8029             : 
    8030             : static void
    8031          52 : delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
    8032             : {
    8033          52 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8034          52 :         struct spdk_blob_list *parent_snapshot_entry = NULL;
    8035          52 :         struct spdk_blob_list *snapshot_entry = NULL;
    8036          52 :         struct spdk_blob_list *clone_entry = NULL;
    8037          52 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8038             : 
    8039          52 :         if (bserrno) {
    8040           4 :                 SPDK_ERRLOG("Failed to sync MD on blob\n");
    8041           4 :                 ctx->bserrno = bserrno;
    8042           4 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8043           4 :                 return;
    8044             :         }
    8045             : 
    8046             :         /* Get snapshot entry for the snapshot we want to remove */
    8047          48 :         snapshot_entry = bs_get_snapshot_entry(ctx->snapshot->bs, ctx->snapshot->id);
    8048             : 
    8049          48 :         assert(snapshot_entry != NULL);
    8050             : 
    8051             :         /* Remove clone entry in this snapshot (at this point there can be only one clone) */
    8052          48 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8053          48 :         assert(clone_entry != NULL);
    8054          48 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    8055          48 :         snapshot_entry->clone_count--;
    8056          48 :         assert(TAILQ_EMPTY(&snapshot_entry->clones));
    8057             : 
    8058          48 :         switch (ctx->snapshot->parent_id) {
    8059          40 :         case SPDK_BLOBID_INVALID:
    8060             :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    8061             :                 /* No parent snapshot - just remove clone entry */
    8062          40 :                 free(clone_entry);
    8063          40 :                 break;
    8064           8 :         default:
    8065             :                 /* This snapshot is at the same time a clone of another snapshot - we need to
    8066             :                  * update parent snapshot (remove current clone, add new one inherited from
    8067             :                  * the snapshot that is being removed) */
    8068             : 
    8069             :                 /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8070             :                  * snapshot that we are removing */
    8071           8 :                 blob_get_snapshot_and_clone_entries(ctx->snapshot, &parent_snapshot_entry,
    8072             :                                                     &snapshot_clone_entry);
    8073             : 
    8074             :                 /* Switch clone entry in parent snapshot */
    8075           8 :                 TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
    8076           8 :                 TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
    8077           8 :                 free(snapshot_clone_entry);
    8078             :         }
    8079             : 
    8080             :         /* Restore md_ro flags */
    8081          48 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8082          48 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8083             : 
    8084          48 :         blob_unfreeze_io(ctx->clone, delete_snapshot_unfreeze_cpl, ctx);
    8085             : }
    8086             : 
    8087             : static void
    8088          56 : delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
    8089             : {
    8090          56 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8091             :         uint64_t i;
    8092             : 
    8093          56 :         ctx->snapshot->md_ro = false;
    8094             : 
    8095          56 :         if (bserrno) {
    8096           4 :                 SPDK_ERRLOG("Failed to sync MD on clone\n");
    8097           4 :                 ctx->bserrno = bserrno;
    8098             : 
    8099             :                 /* Restore snapshot to previous state */
    8100           4 :                 bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8101           4 :                 if (bserrno != 0) {
    8102           0 :                         delete_snapshot_cleanup_clone(ctx, bserrno);
    8103           0 :                         return;
    8104             :                 }
    8105             : 
    8106           4 :                 spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8107           4 :                 return;
    8108             :         }
    8109             : 
    8110             :         /* Clear cluster map entries for snapshot */
    8111         552 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8112         500 :                 if (ctx->clone->active.clusters[i] == ctx->snapshot->active.clusters[i]) {
    8113         492 :                         if (ctx->snapshot->active.clusters[i] != 0) {
    8114         328 :                                 ctx->snapshot->active.num_allocated_clusters--;
    8115             :                         }
    8116         492 :                         ctx->snapshot->active.clusters[i] = 0;
    8117             :                 }
    8118             :         }
    8119          78 :         for (i = 0; i < ctx->snapshot->active.num_extent_pages &&
    8120          52 :              i < ctx->clone->active.num_extent_pages; i++) {
    8121          26 :                 if (ctx->clone->active.extent_pages[i] == ctx->snapshot->active.extent_pages[i]) {
    8122          24 :                         ctx->snapshot->active.extent_pages[i] = 0;
    8123             :                 }
    8124             :         }
    8125             : 
    8126          52 :         blob_set_thin_provision(ctx->snapshot);
    8127          52 :         ctx->snapshot->state = SPDK_BLOB_STATE_DIRTY;
    8128             : 
    8129          52 :         if (ctx->parent_snapshot_entry != NULL) {
    8130           8 :                 ctx->snapshot->back_bs_dev = NULL;
    8131             :         }
    8132             : 
    8133          52 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_cpl, ctx);
    8134             : }
    8135             : 
    8136             : static void
    8137          56 : delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
    8138             : {
    8139             :         int bserrno;
    8140             : 
    8141             :         /* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
    8142          56 :         blob_back_bs_destroy(ctx->clone);
    8143             : 
    8144             :         /* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
    8145          56 :         if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    8146           8 :                 bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
    8147             :                                                  BLOB_EXTERNAL_SNAPSHOT_ID);
    8148           8 :                 if (bserrno != 0) {
    8149           0 :                         ctx->bserrno = bserrno;
    8150             : 
    8151             :                         /* Restore snapshot to previous state */
    8152           0 :                         bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8153           0 :                         if (bserrno != 0) {
    8154           0 :                                 delete_snapshot_cleanup_clone(ctx, bserrno);
    8155           0 :                                 return;
    8156             :                         }
    8157             : 
    8158           0 :                         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8159           0 :                         return;
    8160             :                 }
    8161           8 :                 ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    8162           8 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8163             :                 /* Do not delete the external snapshot along with this snapshot */
    8164           8 :                 ctx->snapshot->back_bs_dev = NULL;
    8165           8 :                 ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    8166          48 :         } else if (ctx->parent_snapshot_entry != NULL) {
    8167             :                 /* ...to parent snapshot */
    8168           8 :                 ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
    8169           8 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8170           8 :                 blob_set_xattr(ctx->clone, BLOB_SNAPSHOT, &ctx->parent_snapshot_entry->id,
    8171             :                                sizeof(spdk_blob_id),
    8172             :                                true);
    8173             :         } else {
    8174             :                 /* ...to blobid invalid and zeroes dev */
    8175          40 :                 ctx->clone->parent_id = SPDK_BLOBID_INVALID;
    8176          40 :                 ctx->clone->back_bs_dev = bs_create_zeroes_dev();
    8177          40 :                 blob_remove_xattr(ctx->clone, BLOB_SNAPSHOT, true);
    8178             :         }
    8179             : 
    8180          56 :         spdk_blob_sync_md(ctx->clone, delete_snapshot_sync_clone_cpl, ctx);
    8181             : }
    8182             : 
    8183             : static void
    8184          58 : delete_snapshot_update_extent_pages(void *cb_arg, int bserrno)
    8185             : {
    8186          58 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8187             :         uint32_t *extent_page;
    8188             :         uint64_t i;
    8189             : 
    8190          84 :         for (i = ctx->next_extent_page; i < ctx->snapshot->active.num_extent_pages &&
    8191          54 :              i < ctx->clone->active.num_extent_pages; i++) {
    8192          28 :                 if (ctx->snapshot->active.extent_pages[i] == 0) {
    8193             :                         /* No extent page to use from snapshot */
    8194           8 :                         continue;
    8195             :                 }
    8196             : 
    8197          20 :                 extent_page = &ctx->clone->active.extent_pages[i];
    8198          20 :                 if (*extent_page == 0) {
    8199             :                         /* Copy extent page from snapshot when clone did not have a matching one */
    8200          18 :                         *extent_page = ctx->snapshot->active.extent_pages[i];
    8201          18 :                         continue;
    8202             :                 }
    8203             : 
    8204             :                 /* Clone and snapshot both contain partially filled matching extent pages.
    8205             :                  * Update the clone extent page in place with cluster map containing the mix of both. */
    8206           2 :                 ctx->next_extent_page = i + 1;
    8207           2 :                 memset(ctx->page, 0, SPDK_BS_PAGE_SIZE);
    8208             : 
    8209           2 :                 blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP, ctx->page,
    8210             :                                        delete_snapshot_update_extent_pages, ctx);
    8211           2 :                 return;
    8212             :         }
    8213          56 :         delete_snapshot_update_extent_pages_cpl(ctx);
    8214             : }
    8215             : 
    8216             : static void
    8217          60 : delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
    8218             : {
    8219          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8220             :         uint64_t i;
    8221             : 
    8222             :         /* Temporarily override md_ro flag for clone for MD modification */
    8223          60 :         ctx->clone_md_ro = ctx->clone->md_ro;
    8224          60 :         ctx->clone->md_ro = false;
    8225             : 
    8226          60 :         if (bserrno) {
    8227           4 :                 SPDK_ERRLOG("Failed to sync MD with xattr on blob\n");
    8228           4 :                 ctx->bserrno = bserrno;
    8229           4 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8230           4 :                 return;
    8231             :         }
    8232             : 
    8233             :         /* Copy snapshot map to clone map (only unallocated clusters in clone) */
    8234         596 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8235         540 :                 if (ctx->clone->active.clusters[i] == 0) {
    8236         532 :                         ctx->clone->active.clusters[i] = ctx->snapshot->active.clusters[i];
    8237         532 :                         if (ctx->clone->active.clusters[i] != 0) {
    8238         368 :                                 ctx->clone->active.num_allocated_clusters++;
    8239             :                         }
    8240             :                 }
    8241             :         }
    8242          56 :         ctx->next_extent_page = 0;
    8243          56 :         delete_snapshot_update_extent_pages(ctx, 0);
    8244             : }
    8245             : 
    8246             : static void
    8247           8 : delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8248             : {
    8249           8 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8250             : 
    8251           8 :         if (bserrno != 0) {
    8252           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
    8253             :                             blob->id, bserrno);
    8254             :                 /* That error should not stop us from syncing metadata. */
    8255             :         }
    8256             : 
    8257           8 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8258           8 : }
    8259             : 
    8260             : static void
    8261          60 : delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
    8262             : {
    8263          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8264             : 
    8265          60 :         if (bserrno) {
    8266           0 :                 SPDK_ERRLOG("Failed to freeze I/O on clone\n");
    8267           0 :                 ctx->bserrno = bserrno;
    8268           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8269           0 :                 return;
    8270             :         }
    8271             : 
    8272             :         /* Temporarily override md_ro flag for snapshot for MD modification */
    8273          60 :         ctx->snapshot_md_ro = ctx->snapshot->md_ro;
    8274          60 :         ctx->snapshot->md_ro = false;
    8275             : 
    8276             :         /* Mark blob as pending for removal for power failure safety, use clone id for recovery */
    8277          60 :         ctx->bserrno = blob_set_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, &ctx->clone->id,
    8278             :                                       sizeof(spdk_blob_id), true);
    8279          60 :         if (ctx->bserrno != 0) {
    8280           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8281           0 :                 return;
    8282             :         }
    8283             : 
    8284          60 :         if (blob_is_esnap_clone(ctx->snapshot)) {
    8285           8 :                 blob_esnap_destroy_bs_dev_channels(ctx->snapshot, false,
    8286             :                                                    delete_snapshot_esnap_channels_destroyed_cb,
    8287             :                                                    ctx);
    8288           8 :                 return;
    8289             :         }
    8290             : 
    8291          52 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8292             : }
    8293             : 
    8294             : static void
    8295          70 : delete_snapshot_open_clone_cb(void *cb_arg, struct spdk_blob *clone, int bserrno)
    8296             : {
    8297          70 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8298             : 
    8299          70 :         if (bserrno) {
    8300          10 :                 SPDK_ERRLOG("Failed to open clone\n");
    8301          10 :                 ctx->bserrno = bserrno;
    8302          10 :                 delete_snapshot_cleanup_snapshot(ctx, 0);
    8303          10 :                 return;
    8304             :         }
    8305             : 
    8306          60 :         ctx->clone = clone;
    8307             : 
    8308          60 :         if (clone->locked_operation_in_progress) {
    8309           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress on its clone\n");
    8310           0 :                 ctx->bserrno = -EBUSY;
    8311           0 :                 spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8312           0 :                 return;
    8313             :         }
    8314             : 
    8315          60 :         clone->locked_operation_in_progress = true;
    8316             : 
    8317          60 :         blob_freeze_io(clone, delete_snapshot_freeze_io_cb, ctx);
    8318             : }
    8319             : 
    8320             : static void
    8321          70 : update_clone_on_snapshot_deletion(struct spdk_blob *snapshot, struct delete_snapshot_ctx *ctx)
    8322             : {
    8323          70 :         struct spdk_blob_list *snapshot_entry = NULL;
    8324          70 :         struct spdk_blob_list *clone_entry = NULL;
    8325          70 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8326             : 
    8327             :         /* Get snapshot entry for the snapshot we want to remove */
    8328          70 :         snapshot_entry = bs_get_snapshot_entry(snapshot->bs, snapshot->id);
    8329             : 
    8330          70 :         assert(snapshot_entry != NULL);
    8331             : 
    8332             :         /* Get clone of the snapshot (at this point there can be only one clone) */
    8333          70 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8334          70 :         assert(snapshot_entry->clone_count == 1);
    8335          70 :         assert(clone_entry != NULL);
    8336             : 
    8337             :         /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8338             :          * snapshot that we are removing */
    8339          70 :         blob_get_snapshot_and_clone_entries(snapshot, &ctx->parent_snapshot_entry,
    8340             :                                             &snapshot_clone_entry);
    8341             : 
    8342          70 :         spdk_bs_open_blob(snapshot->bs, clone_entry->id, delete_snapshot_open_clone_cb, ctx);
    8343          70 : }
    8344             : 
    8345             : static void
    8346        1550 : bs_delete_blob_finish(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8347             : {
    8348        1550 :         spdk_bs_sequence_t *seq = cb_arg;
    8349        1550 :         struct spdk_blob_list *snapshot_entry = NULL;
    8350             :         uint32_t page_num;
    8351             : 
    8352        1550 :         if (bserrno) {
    8353          62 :                 SPDK_ERRLOG("Failed to remove blob\n");
    8354          62 :                 bs_sequence_finish(seq, bserrno);
    8355          62 :                 return;
    8356             :         }
    8357             : 
    8358             :         /* Remove snapshot from the list */
    8359        1488 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8360        1488 :         if (snapshot_entry != NULL) {
    8361         140 :                 TAILQ_REMOVE(&blob->bs->snapshots, snapshot_entry, link);
    8362         140 :                 free(snapshot_entry);
    8363             :         }
    8364             : 
    8365        1488 :         page_num = bs_blobid_to_page(blob->id);
    8366        1488 :         spdk_bit_array_clear(blob->bs->used_blobids, page_num);
    8367        1488 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8368        1488 :         blob->active.num_pages = 0;
    8369        1488 :         blob_resize(blob, 0);
    8370             : 
    8371        1488 :         blob_persist(seq, blob, bs_delete_persist_cpl, blob);
    8372             : }
    8373             : 
    8374             : static int
    8375        1550 : bs_is_blob_deletable(struct spdk_blob *blob, bool *update_clone)
    8376             : {
    8377        1550 :         struct spdk_blob_list *snapshot_entry = NULL;
    8378        1550 :         struct spdk_blob_list *clone_entry = NULL;
    8379        1550 :         struct spdk_blob *clone = NULL;
    8380        1550 :         bool has_one_clone = false;
    8381             : 
    8382             :         /* Check if this is a snapshot with clones */
    8383        1550 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8384        1550 :         if (snapshot_entry != NULL) {
    8385         190 :                 if (snapshot_entry->clone_count > 1) {
    8386          24 :                         SPDK_ERRLOG("Cannot remove snapshot with more than one clone\n");
    8387          24 :                         return -EBUSY;
    8388         166 :                 } else if (snapshot_entry->clone_count == 1) {
    8389          70 :                         has_one_clone = true;
    8390             :                 }
    8391             :         }
    8392             : 
    8393             :         /* Check if someone has this blob open (besides this delete context):
    8394             :          * - open_ref = 1 - only this context opened blob, so it is ok to remove it
    8395             :          * - open_ref <= 2 && has_one_clone = true - clone is holding snapshot
    8396             :          *      and that is ok, because we will update it accordingly */
    8397        1526 :         if (blob->open_ref <= 2 && has_one_clone) {
    8398          70 :                 clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8399          70 :                 assert(clone_entry != NULL);
    8400          70 :                 clone = blob_lookup(blob->bs, clone_entry->id);
    8401             : 
    8402          70 :                 if (blob->open_ref == 2 && clone == NULL) {
    8403             :                         /* Clone is closed and someone else opened this blob */
    8404           0 :                         SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8405           0 :                         return -EBUSY;
    8406             :                 }
    8407             : 
    8408          70 :                 *update_clone = true;
    8409          70 :                 return 0;
    8410             :         }
    8411             : 
    8412        1456 :         if (blob->open_ref > 1) {
    8413          16 :                 SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8414          16 :                 return -EBUSY;
    8415             :         }
    8416             : 
    8417        1440 :         assert(has_one_clone == false);
    8418        1440 :         *update_clone = false;
    8419        1440 :         return 0;
    8420             : }
    8421             : 
    8422             : static void
    8423           0 : bs_delete_enomem_close_cpl(void *cb_arg, int bserrno)
    8424             : {
    8425           0 :         spdk_bs_sequence_t *seq = cb_arg;
    8426             : 
    8427           0 :         bs_sequence_finish(seq, -ENOMEM);
    8428           0 : }
    8429             : 
    8430             : static void
    8431        1560 : bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8432             : {
    8433        1560 :         spdk_bs_sequence_t *seq = cb_arg;
    8434             :         struct delete_snapshot_ctx *ctx;
    8435        1560 :         bool update_clone = false;
    8436             : 
    8437        1560 :         if (bserrno != 0) {
    8438          10 :                 bs_sequence_finish(seq, bserrno);
    8439          10 :                 return;
    8440             :         }
    8441             : 
    8442        1550 :         blob_verify_md_op(blob);
    8443             : 
    8444        1550 :         ctx = calloc(1, sizeof(*ctx));
    8445        1550 :         if (ctx == NULL) {
    8446           0 :                 spdk_blob_close(blob, bs_delete_enomem_close_cpl, seq);
    8447           0 :                 return;
    8448             :         }
    8449             : 
    8450        1550 :         ctx->snapshot = blob;
    8451        1550 :         ctx->cb_fn = bs_delete_blob_finish;
    8452        1550 :         ctx->cb_arg = seq;
    8453             : 
    8454             :         /* Check if blob can be removed and if it is a snapshot with clone on top of it */
    8455        1550 :         ctx->bserrno = bs_is_blob_deletable(blob, &update_clone);
    8456        1550 :         if (ctx->bserrno) {
    8457          40 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8458          40 :                 return;
    8459             :         }
    8460             : 
    8461        1510 :         if (blob->locked_operation_in_progress) {
    8462           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress\n");
    8463           0 :                 ctx->bserrno = -EBUSY;
    8464           0 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8465           0 :                 return;
    8466             :         }
    8467             : 
    8468        1510 :         blob->locked_operation_in_progress = true;
    8469             : 
    8470             :         /*
    8471             :          * Remove the blob from the blob_store list now, to ensure it does not
    8472             :          *  get returned after this point by blob_lookup().
    8473             :          */
    8474        1510 :         spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    8475        1510 :         RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8476             : 
    8477        1510 :         if (update_clone) {
    8478          70 :                 ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    8479          70 :                 if (!ctx->page) {
    8480           0 :                         ctx->bserrno = -ENOMEM;
    8481           0 :                         spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8482           0 :                         return;
    8483             :                 }
    8484             :                 /* This blob is a snapshot with active clone - update clone first */
    8485          70 :                 update_clone_on_snapshot_deletion(blob, ctx);
    8486             :         } else {
    8487             :                 /* This blob does not have any clones - just remove it */
    8488        1440 :                 bs_blob_list_remove(blob);
    8489        1440 :                 bs_delete_blob_finish(seq, blob, 0);
    8490        1440 :                 free(ctx);
    8491             :         }
    8492             : }
    8493             : 
    8494             : void
    8495        1560 : spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8496             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    8497             : {
    8498        1560 :         struct spdk_bs_cpl      cpl;
    8499             :         spdk_bs_sequence_t      *seq;
    8500             : 
    8501        1560 :         SPDK_DEBUGLOG(blob, "Deleting blob 0x%" PRIx64 "\n", blobid);
    8502             : 
    8503        1560 :         assert(spdk_get_thread() == bs->md_thread);
    8504             : 
    8505        1560 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8506        1560 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8507        1560 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8508             : 
    8509        1560 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8510        1560 :         if (!seq) {
    8511           0 :                 cb_fn(cb_arg, -ENOMEM);
    8512           0 :                 return;
    8513             :         }
    8514             : 
    8515        1560 :         spdk_bs_open_blob(bs, blobid, bs_delete_open_cpl, seq);
    8516             : }
    8517             : 
    8518             : /* END spdk_bs_delete_blob */
    8519             : 
    8520             : /* START spdk_bs_open_blob */
    8521             : 
    8522             : static void
    8523        3466 : bs_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8524             : {
    8525        3466 :         struct spdk_blob *blob = cb_arg;
    8526             :         struct spdk_blob *existing;
    8527             : 
    8528        3466 :         if (bserrno != 0) {
    8529          64 :                 blob_free(blob);
    8530          64 :                 seq->cpl.u.blob_handle.blob = NULL;
    8531          64 :                 bs_sequence_finish(seq, bserrno);
    8532          64 :                 return;
    8533             :         }
    8534             : 
    8535        3402 :         existing = blob_lookup(blob->bs, blob->id);
    8536        3402 :         if (existing) {
    8537           4 :                 blob_free(blob);
    8538           4 :                 existing->open_ref++;
    8539           4 :                 seq->cpl.u.blob_handle.blob = existing;
    8540           4 :                 bs_sequence_finish(seq, 0);
    8541           4 :                 return;
    8542             :         }
    8543             : 
    8544        3398 :         blob->open_ref++;
    8545             : 
    8546        3398 :         spdk_bit_array_set(blob->bs->open_blobids, blob->id);
    8547        3398 :         RB_INSERT(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8548             : 
    8549        3398 :         bs_sequence_finish(seq, bserrno);
    8550             : }
    8551             : 
    8552             : static inline void
    8553           4 : blob_open_opts_copy(const struct spdk_blob_open_opts *src, struct spdk_blob_open_opts *dst)
    8554             : {
    8555             : #define FIELD_OK(field) \
    8556             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(src->field) <= src->opts_size
    8557             : 
    8558             : #define SET_FIELD(field) \
    8559             :         if (FIELD_OK(field)) { \
    8560             :                 dst->field = src->field; \
    8561             :         } \
    8562             : 
    8563           4 :         SET_FIELD(clear_method);
    8564           4 :         SET_FIELD(esnap_ctx);
    8565             : 
    8566           4 :         dst->opts_size = src->opts_size;
    8567             : 
    8568             :         /* You should not remove this statement, but need to update the assert statement
    8569             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    8570             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_open_opts) == 24, "Incorrect size");
    8571             : 
    8572             : #undef FIELD_OK
    8573             : #undef SET_FIELD
    8574           4 : }
    8575             : 
    8576             : static void
    8577        4263 : bs_open_blob(struct spdk_blob_store *bs,
    8578             :              spdk_blob_id blobid,
    8579             :              struct spdk_blob_open_opts *opts,
    8580             :              spdk_blob_op_with_handle_complete cb_fn,
    8581             :              void *cb_arg)
    8582             : {
    8583             :         struct spdk_blob                *blob;
    8584        4263 :         struct spdk_bs_cpl              cpl;
    8585        4263 :         struct spdk_blob_open_opts      opts_local;
    8586             :         spdk_bs_sequence_t              *seq;
    8587             :         uint32_t                        page_num;
    8588             : 
    8589        4263 :         SPDK_DEBUGLOG(blob, "Opening blob 0x%" PRIx64 "\n", blobid);
    8590        4263 :         assert(spdk_get_thread() == bs->md_thread);
    8591             : 
    8592        4263 :         page_num = bs_blobid_to_page(blobid);
    8593        4263 :         if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
    8594             :                 /* Invalid blobid */
    8595          48 :                 cb_fn(cb_arg, NULL, -ENOENT);
    8596          48 :                 return;
    8597             :         }
    8598             : 
    8599        4215 :         blob = blob_lookup(bs, blobid);
    8600        4215 :         if (blob) {
    8601         749 :                 blob->open_ref++;
    8602         749 :                 cb_fn(cb_arg, blob, 0);
    8603         749 :                 return;
    8604             :         }
    8605             : 
    8606        3466 :         blob = blob_alloc(bs, blobid);
    8607        3466 :         if (!blob) {
    8608           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8609           0 :                 return;
    8610             :         }
    8611             : 
    8612        3466 :         spdk_blob_open_opts_init(&opts_local, sizeof(opts_local));
    8613        3466 :         if (opts) {
    8614           4 :                 blob_open_opts_copy(opts, &opts_local);
    8615             :         }
    8616             : 
    8617        3466 :         blob->clear_method = opts_local.clear_method;
    8618             : 
    8619        3466 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE;
    8620        3466 :         cpl.u.blob_handle.cb_fn = cb_fn;
    8621        3466 :         cpl.u.blob_handle.cb_arg = cb_arg;
    8622        3466 :         cpl.u.blob_handle.blob = blob;
    8623        3466 :         cpl.u.blob_handle.esnap_ctx = opts_local.esnap_ctx;
    8624             : 
    8625        3466 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8626        3466 :         if (!seq) {
    8627           0 :                 blob_free(blob);
    8628           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8629           0 :                 return;
    8630             :         }
    8631             : 
    8632        3466 :         blob_load(seq, blob, bs_open_blob_cpl, blob);
    8633             : }
    8634             : 
    8635             : void
    8636        4259 : spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8637             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8638             : {
    8639        4259 :         bs_open_blob(bs, blobid, NULL, cb_fn, cb_arg);
    8640        4259 : }
    8641             : 
    8642             : void
    8643           4 : spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8644             :                       struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8645             : {
    8646           4 :         bs_open_blob(bs, blobid, opts, cb_fn, cb_arg);
    8647           4 : }
    8648             : 
    8649             : /* END spdk_bs_open_blob */
    8650             : 
    8651             : /* START spdk_blob_set_read_only */
    8652             : int
    8653         232 : spdk_blob_set_read_only(struct spdk_blob *blob)
    8654             : {
    8655         232 :         blob_verify_md_op(blob);
    8656             : 
    8657         232 :         blob->data_ro_flags |= SPDK_BLOB_READ_ONLY;
    8658             : 
    8659         232 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8660         232 :         return 0;
    8661             : }
    8662             : /* END spdk_blob_set_read_only */
    8663             : 
    8664             : /* START spdk_blob_sync_md */
    8665             : 
    8666             : static void
    8667        1591 : blob_sync_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8668             : {
    8669        1591 :         struct spdk_blob *blob = cb_arg;
    8670             : 
    8671        1591 :         if (bserrno == 0 && (blob->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
    8672         400 :                 blob->data_ro = true;
    8673         400 :                 blob->md_ro = true;
    8674             :         }
    8675             : 
    8676        1591 :         bs_sequence_finish(seq, bserrno);
    8677        1591 : }
    8678             : 
    8679             : static void
    8680        1591 : blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8681             : {
    8682        1591 :         struct spdk_bs_cpl      cpl;
    8683             :         spdk_bs_sequence_t      *seq;
    8684             : 
    8685        1591 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8686        1591 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8687        1591 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8688             : 
    8689        1591 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8690        1591 :         if (!seq) {
    8691           0 :                 cb_fn(cb_arg, -ENOMEM);
    8692           0 :                 return;
    8693             :         }
    8694             : 
    8695        1591 :         blob_persist(seq, blob, blob_sync_md_cpl, blob);
    8696             : }
    8697             : 
    8698             : void
    8699        1081 : spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8700             : {
    8701        1081 :         blob_verify_md_op(blob);
    8702             : 
    8703        1081 :         SPDK_DEBUGLOG(blob, "Syncing blob 0x%" PRIx64 "\n", blob->id);
    8704             : 
    8705        1081 :         if (blob->md_ro) {
    8706           4 :                 assert(blob->state == SPDK_BLOB_STATE_CLEAN);
    8707           4 :                 cb_fn(cb_arg, 0);
    8708           4 :                 return;
    8709             :         }
    8710             : 
    8711        1077 :         blob_sync_md(blob, cb_fn, cb_arg);
    8712             : }
    8713             : 
    8714             : /* END spdk_blob_sync_md */
    8715             : 
    8716             : struct spdk_blob_cluster_op_ctx {
    8717             :         struct spdk_thread      *thread;
    8718             :         struct spdk_blob        *blob;
    8719             :         uint32_t                cluster_num;    /* cluster index in blob */
    8720             :         uint32_t                cluster;        /* cluster on disk */
    8721             :         uint32_t                extent_page;    /* extent page on disk */
    8722             :         struct spdk_blob_md_page *page; /* preallocated extent page */
    8723             :         int                     rc;
    8724             :         spdk_blob_op_complete   cb_fn;
    8725             :         void                    *cb_arg;
    8726             : };
    8727             : 
    8728             : static void
    8729         876 : blob_op_cluster_msg_cpl(void *arg)
    8730             : {
    8731         876 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8732             : 
    8733         876 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    8734         876 :         free(ctx);
    8735         876 : }
    8736             : 
    8737             : static void
    8738         846 : blob_op_cluster_msg_cb(void *arg, int bserrno)
    8739             : {
    8740         846 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8741             : 
    8742         846 :         ctx->rc = bserrno;
    8743         846 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8744         846 : }
    8745             : 
    8746             : static void
    8747          82 : blob_insert_new_ep_cb(void *arg, int bserrno)
    8748             : {
    8749          82 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8750             :         uint32_t *extent_page;
    8751             : 
    8752          82 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8753          82 :         *extent_page = ctx->extent_page;
    8754          82 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8755          82 :         blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8756          82 : }
    8757             : 
    8758             : struct spdk_blob_write_extent_page_ctx {
    8759             :         struct spdk_blob_store          *bs;
    8760             : 
    8761             :         uint32_t                        extent;
    8762             :         struct spdk_blob_md_page        *page;
    8763             : };
    8764             : 
    8765             : static void
    8766          26 : blob_free_cluster_msg_cb(void *arg, int bserrno)
    8767             : {
    8768          26 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8769             : 
    8770          26 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8771          26 :         bs_release_cluster(ctx->blob->bs, ctx->cluster);
    8772          26 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8773             : 
    8774          26 :         ctx->rc = bserrno;
    8775          26 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8776          26 : }
    8777             : 
    8778             : static void
    8779          26 : blob_free_cluster_update_ep_cb(void *arg, int bserrno)
    8780             : {
    8781          26 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8782             : 
    8783          26 :         if (bserrno != 0 || ctx->blob->bs->clean == 0) {
    8784          26 :                 blob_free_cluster_msg_cb(ctx, bserrno);
    8785          26 :                 return;
    8786             :         }
    8787             : 
    8788           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8789           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8790             : }
    8791             : 
    8792             : static void
    8793           0 : blob_free_cluster_free_ep_cb(void *arg, int bserrno)
    8794             : {
    8795           0 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8796             : 
    8797           0 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8798           0 :         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8799           0 :         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8800           0 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8801           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8802           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8803           0 : }
    8804             : 
    8805             : static void
    8806         434 : blob_persist_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8807             : {
    8808         434 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8809             : 
    8810         434 :         free(ctx);
    8811         434 :         bs_sequence_finish(seq, bserrno);
    8812         434 : }
    8813             : 
    8814             : static void
    8815         434 : blob_write_extent_page_ready(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8816             : {
    8817         434 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8818             : 
    8819         434 :         if (bserrno != 0) {
    8820           0 :                 blob_persist_extent_page_cpl(seq, ctx, bserrno);
    8821           0 :                 return;
    8822             :         }
    8823         434 :         bs_sequence_write_dev(seq, ctx->page, bs_md_page_to_lba(ctx->bs, ctx->extent),
    8824         434 :                               bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    8825             :                               blob_persist_extent_page_cpl, ctx);
    8826             : }
    8827             : 
    8828             : static void
    8829         434 : blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
    8830             :                        struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    8831             : {
    8832             :         struct spdk_blob_write_extent_page_ctx  *ctx;
    8833             :         spdk_bs_sequence_t                      *seq;
    8834         434 :         struct spdk_bs_cpl                      cpl;
    8835             : 
    8836         434 :         ctx = calloc(1, sizeof(*ctx));
    8837         434 :         if (!ctx) {
    8838           0 :                 cb_fn(cb_arg, -ENOMEM);
    8839           0 :                 return;
    8840             :         }
    8841         434 :         ctx->bs = blob->bs;
    8842         434 :         ctx->extent = extent;
    8843         434 :         ctx->page = page;
    8844             : 
    8845         434 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8846         434 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8847         434 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8848             : 
    8849         434 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8850         434 :         if (!seq) {
    8851           0 :                 free(ctx);
    8852           0 :                 cb_fn(cb_arg, -ENOMEM);
    8853           0 :                 return;
    8854             :         }
    8855             : 
    8856         434 :         assert(page);
    8857         434 :         page->next = SPDK_INVALID_MD_PAGE;
    8858         434 :         page->id = blob->id;
    8859         434 :         page->sequence_num = 0;
    8860             : 
    8861         434 :         blob_serialize_extent_page(blob, cluster_num, page);
    8862             : 
    8863         434 :         page->crc = blob_md_page_calc_crc(page);
    8864             : 
    8865         434 :         assert(spdk_bit_array_get(blob->bs->used_md_pages, extent) == true);
    8866             : 
    8867         434 :         bs_mark_dirty(seq, blob->bs, blob_write_extent_page_ready, ctx);
    8868             : }
    8869             : 
    8870             : static void
    8871         816 : blob_insert_cluster_msg(void *arg)
    8872             : {
    8873         816 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8874             :         uint32_t *extent_page;
    8875             : 
    8876         816 :         ctx->rc = blob_insert_cluster(ctx->blob, ctx->cluster_num, ctx->cluster);
    8877         816 :         if (ctx->rc != 0) {
    8878           4 :                 spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8879           4 :                 return;
    8880             :         }
    8881             : 
    8882         812 :         if (ctx->blob->use_extent_table == false) {
    8883             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    8884         406 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8885         406 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8886         406 :                 return;
    8887             :         }
    8888             : 
    8889         406 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8890         406 :         if (*extent_page == 0) {
    8891             :                 /* Extent page requires allocation.
    8892             :                  * It was already claimed in the used_md_pages map and placed in ctx. */
    8893          82 :                 assert(ctx->extent_page != 0);
    8894          82 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8895          82 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    8896             :                                        blob_insert_new_ep_cb, ctx);
    8897             :         } else {
    8898             :                 /* It is possible for original thread to allocate extent page for
    8899             :                  * different cluster in the same extent page. In such case proceed with
    8900             :                  * updating the existing extent page, but release the additional one. */
    8901         324 :                 if (ctx->extent_page != 0) {
    8902           0 :                         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8903           0 :                         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8904           0 :                         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8905           0 :                         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8906           0 :                         ctx->extent_page = 0;
    8907             :                 }
    8908             :                 /* Extent page already allocated.
    8909             :                  * Every cluster allocation, requires just an update of single extent page. */
    8910         324 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    8911             :                                        blob_op_cluster_msg_cb, ctx);
    8912             :         }
    8913             : }
    8914             : 
    8915             : static void
    8916         816 : blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
    8917             :                                  uint64_t cluster, uint32_t extent_page, struct spdk_blob_md_page *page,
    8918             :                                  spdk_blob_op_complete cb_fn, void *cb_arg)
    8919             : {
    8920             :         struct spdk_blob_cluster_op_ctx *ctx;
    8921             : 
    8922         816 :         ctx = calloc(1, sizeof(*ctx));
    8923         816 :         if (ctx == NULL) {
    8924           0 :                 cb_fn(cb_arg, -ENOMEM);
    8925           0 :                 return;
    8926             :         }
    8927             : 
    8928         816 :         ctx->thread = spdk_get_thread();
    8929         816 :         ctx->blob = blob;
    8930         816 :         ctx->cluster_num = cluster_num;
    8931         816 :         ctx->cluster = cluster;
    8932         816 :         ctx->extent_page = extent_page;
    8933         816 :         ctx->page = page;
    8934         816 :         ctx->cb_fn = cb_fn;
    8935         816 :         ctx->cb_arg = cb_arg;
    8936             : 
    8937         816 :         spdk_thread_send_msg(blob->bs->md_thread, blob_insert_cluster_msg, ctx);
    8938             : }
    8939             : 
    8940             : static void
    8941          60 : blob_free_cluster_msg(void *arg)
    8942             : {
    8943          60 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8944             :         uint32_t *extent_page;
    8945             :         uint32_t start_cluster_idx;
    8946          60 :         bool free_extent_page = true;
    8947             :         size_t i;
    8948             : 
    8949          60 :         ctx->cluster = bs_lba_to_cluster(ctx->blob->bs, ctx->blob->active.clusters[ctx->cluster_num]);
    8950             : 
    8951             :         /* There were concurrent unmaps to the same cluster, only release the cluster on the first one */
    8952          60 :         if (ctx->cluster == 0) {
    8953           8 :                 blob_op_cluster_msg_cb(ctx, 0);
    8954           8 :                 return;
    8955             :         }
    8956             : 
    8957          52 :         ctx->blob->active.clusters[ctx->cluster_num] = 0;
    8958          52 :         if (ctx->cluster != 0) {
    8959          52 :                 ctx->blob->active.num_allocated_clusters--;
    8960             :         }
    8961             : 
    8962          52 :         if (ctx->blob->use_extent_table == false) {
    8963             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    8964          26 :                 spdk_spin_lock(&ctx->blob->bs->used_lock);
    8965          26 :                 bs_release_cluster(ctx->blob->bs, ctx->cluster);
    8966          26 :                 spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8967          26 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8968          26 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8969          26 :                 return;
    8970             :         }
    8971             : 
    8972          26 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8973             : 
    8974             :         /* There shouldn't be parallel release operations on same cluster */
    8975          26 :         assert(*extent_page == ctx->extent_page);
    8976             : 
    8977          26 :         start_cluster_idx = (ctx->cluster_num / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    8978          48 :         for (i = 0; i < SPDK_EXTENTS_PER_EP; ++i) {
    8979          48 :                 if (ctx->blob->active.clusters[start_cluster_idx + i] != 0) {
    8980          26 :                         free_extent_page = false;
    8981          26 :                         break;
    8982             :                 }
    8983             :         }
    8984             : 
    8985          26 :         if (free_extent_page) {
    8986           0 :                 assert(ctx->extent_page != 0);
    8987           0 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8988           0 :                 ctx->blob->active.extent_pages[bs_cluster_to_extent_table_id(ctx->cluster_num)] = 0;
    8989           0 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    8990             :                                        blob_free_cluster_free_ep_cb, ctx);
    8991             :         } else {
    8992          26 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    8993             :                                        blob_free_cluster_update_ep_cb, ctx);
    8994             :         }
    8995             : }
    8996             : 
    8997             : 
    8998             : static void
    8999          60 : blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num, uint32_t extent_page,
    9000             :                                struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    9001             : {
    9002             :         struct spdk_blob_cluster_op_ctx *ctx;
    9003             : 
    9004          60 :         ctx = calloc(1, sizeof(*ctx));
    9005          60 :         if (ctx == NULL) {
    9006           0 :                 cb_fn(cb_arg, -ENOMEM);
    9007           0 :                 return;
    9008             :         }
    9009             : 
    9010          60 :         ctx->thread = spdk_get_thread();
    9011          60 :         ctx->blob = blob;
    9012          60 :         ctx->cluster_num = cluster_num;
    9013          60 :         ctx->extent_page = extent_page;
    9014          60 :         ctx->page = page;
    9015          60 :         ctx->cb_fn = cb_fn;
    9016          60 :         ctx->cb_arg = cb_arg;
    9017             : 
    9018          60 :         spdk_thread_send_msg(blob->bs->md_thread, blob_free_cluster_msg, ctx);
    9019             : }
    9020             : 
    9021             : /* START spdk_blob_close */
    9022             : 
    9023             : static void
    9024        4151 : blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9025             : {
    9026        4151 :         struct spdk_blob *blob = cb_arg;
    9027             : 
    9028        4151 :         if (bserrno == 0) {
    9029        4151 :                 blob->open_ref--;
    9030        4151 :                 if (blob->open_ref == 0) {
    9031             :                         /*
    9032             :                          * Blobs with active.num_pages == 0 are deleted blobs.
    9033             :                          *  these blobs are removed from the blob_store list
    9034             :                          *  when the deletion process starts - so don't try to
    9035             :                          *  remove them again.
    9036             :                          */
    9037        3398 :                         if (blob->active.num_pages > 0) {
    9038        1910 :                                 spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    9039        1910 :                                 RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    9040             :                         }
    9041        3398 :                         blob_free(blob);
    9042             :                 }
    9043             :         }
    9044             : 
    9045        4151 :         bs_sequence_finish(seq, bserrno);
    9046        4151 : }
    9047             : 
    9048             : static void
    9049         112 : blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
    9050             : {
    9051         112 :         spdk_bs_sequence_t      *seq = cb_arg;
    9052             : 
    9053         112 :         if (bserrno != 0) {
    9054           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
    9055             :                               blob->id, bserrno);
    9056           0 :                 bs_sequence_finish(seq, bserrno);
    9057           0 :                 return;
    9058             :         }
    9059             : 
    9060         112 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n",
    9061             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
    9062             : 
    9063             :         /* Sync metadata */
    9064         112 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9065             : }
    9066             : 
    9067             : void
    9068        4151 : spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    9069             : {
    9070        4151 :         struct spdk_bs_cpl      cpl;
    9071             :         spdk_bs_sequence_t      *seq;
    9072             : 
    9073        4151 :         blob_verify_md_op(blob);
    9074             : 
    9075        4151 :         SPDK_DEBUGLOG(blob, "Closing blob 0x%" PRIx64 "\n", blob->id);
    9076             : 
    9077        4151 :         if (blob->open_ref == 0) {
    9078           0 :                 cb_fn(cb_arg, -EBADF);
    9079           0 :                 return;
    9080             :         }
    9081             : 
    9082        4151 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    9083        4151 :         cpl.u.blob_basic.cb_fn = cb_fn;
    9084        4151 :         cpl.u.blob_basic.cb_arg = cb_arg;
    9085             : 
    9086        4151 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    9087        4151 :         if (!seq) {
    9088           0 :                 cb_fn(cb_arg, -ENOMEM);
    9089           0 :                 return;
    9090             :         }
    9091             : 
    9092        4151 :         if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
    9093         112 :                 blob_esnap_destroy_bs_dev_channels(blob, false, blob_close_esnap_done, seq);
    9094         112 :                 return;
    9095             :         }
    9096             : 
    9097             :         /* Sync metadata */
    9098        4039 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9099             : }
    9100             : 
    9101             : /* END spdk_blob_close */
    9102             : 
    9103         229 : struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
    9104             : {
    9105         229 :         return spdk_get_io_channel(bs);
    9106             : }
    9107             : 
    9108             : void
    9109         229 : spdk_bs_free_io_channel(struct spdk_io_channel *channel)
    9110             : {
    9111         229 :         blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
    9112         229 :         spdk_put_io_channel(channel);
    9113         229 : }
    9114             : 
    9115             : void
    9116         108 : spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9117             :                    uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9118             : {
    9119         108 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9120             :                                SPDK_BLOB_UNMAP);
    9121         108 : }
    9122             : 
    9123             : void
    9124          48 : spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9125             :                           uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9126             : {
    9127          48 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9128             :                                SPDK_BLOB_WRITE_ZEROES);
    9129          48 : }
    9130             : 
    9131             : void
    9132       20868 : spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9133             :                    void *payload, uint64_t offset, uint64_t length,
    9134             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9135             : {
    9136       20868 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9137             :                                SPDK_BLOB_WRITE);
    9138       20868 : }
    9139             : 
    9140             : void
    9141       17500 : spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9142             :                   void *payload, uint64_t offset, uint64_t length,
    9143             :                   spdk_blob_op_complete cb_fn, void *cb_arg)
    9144             : {
    9145       17500 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9146             :                                SPDK_BLOB_READ);
    9147       17500 : }
    9148             : 
    9149             : void
    9150         140 : spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9151             :                     struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9152             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    9153             : {
    9154         140 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false, NULL);
    9155         140 : }
    9156             : 
    9157             : void
    9158         940 : spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9159             :                    struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9160             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9161             : {
    9162         940 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true, NULL);
    9163         940 : }
    9164             : 
    9165             : void
    9166         208 : spdk_blob_io_writev_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9167             :                         struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9168             :                         spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9169             : {
    9170         208 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false,
    9171             :                                    io_opts);
    9172         208 : }
    9173             : 
    9174             : void
    9175        1300 : spdk_blob_io_readv_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9176             :                        struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9177             :                        spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9178             : {
    9179        1300 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true,
    9180             :                                    io_opts);
    9181        1300 : }
    9182             : 
    9183             : struct spdk_bs_iter_ctx {
    9184             :         int64_t page_num;
    9185             :         struct spdk_blob_store *bs;
    9186             : 
    9187             :         spdk_blob_op_with_handle_complete cb_fn;
    9188             :         void *cb_arg;
    9189             : };
    9190             : 
    9191             : static void
    9192        1164 : bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    9193             : {
    9194        1164 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9195        1164 :         struct spdk_blob_store *bs = ctx->bs;
    9196             :         spdk_blob_id id;
    9197             : 
    9198        1164 :         if (bserrno == 0) {
    9199         444 :                 ctx->cb_fn(ctx->cb_arg, _blob, bserrno);
    9200         444 :                 free(ctx);
    9201         444 :                 return;
    9202             :         }
    9203             : 
    9204         720 :         ctx->page_num++;
    9205         720 :         ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
    9206         720 :         if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
    9207         268 :                 ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
    9208         268 :                 free(ctx);
    9209         268 :                 return;
    9210             :         }
    9211             : 
    9212         452 :         id = bs_page_to_blobid(ctx->page_num);
    9213             : 
    9214         452 :         spdk_bs_open_blob(bs, id, bs_iter_cpl, ctx);
    9215             : }
    9216             : 
    9217             : void
    9218         292 : spdk_bs_iter_first(struct spdk_blob_store *bs,
    9219             :                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9220             : {
    9221             :         struct spdk_bs_iter_ctx *ctx;
    9222             : 
    9223         292 :         ctx = calloc(1, sizeof(*ctx));
    9224         292 :         if (!ctx) {
    9225           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9226           0 :                 return;
    9227             :         }
    9228             : 
    9229         292 :         ctx->page_num = -1;
    9230         292 :         ctx->bs = bs;
    9231         292 :         ctx->cb_fn = cb_fn;
    9232         292 :         ctx->cb_arg = cb_arg;
    9233             : 
    9234         292 :         bs_iter_cpl(ctx, NULL, -1);
    9235             : }
    9236             : 
    9237             : static void
    9238         420 : bs_iter_close_cpl(void *cb_arg, int bserrno)
    9239             : {
    9240         420 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9241             : 
    9242         420 :         bs_iter_cpl(ctx, NULL, -1);
    9243         420 : }
    9244             : 
    9245             : void
    9246         420 : spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
    9247             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9248             : {
    9249             :         struct spdk_bs_iter_ctx *ctx;
    9250             : 
    9251         420 :         assert(blob != NULL);
    9252             : 
    9253         420 :         ctx = calloc(1, sizeof(*ctx));
    9254         420 :         if (!ctx) {
    9255           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9256           0 :                 return;
    9257             :         }
    9258             : 
    9259         420 :         ctx->page_num = bs_blobid_to_page(blob->id);
    9260         420 :         ctx->bs = bs;
    9261         420 :         ctx->cb_fn = cb_fn;
    9262         420 :         ctx->cb_arg = cb_arg;
    9263             : 
    9264             :         /* Close the existing blob */
    9265         420 :         spdk_blob_close(blob, bs_iter_close_cpl, ctx);
    9266             : }
    9267             : 
    9268             : static int
    9269         943 : blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9270             :                uint16_t value_len, bool internal)
    9271             : {
    9272             :         struct spdk_xattr_tailq *xattrs;
    9273             :         struct spdk_xattr       *xattr;
    9274             :         size_t                  desc_size;
    9275             :         void                    *tmp;
    9276             : 
    9277         943 :         blob_verify_md_op(blob);
    9278             : 
    9279         943 :         if (blob->md_ro) {
    9280           4 :                 return -EPERM;
    9281             :         }
    9282             : 
    9283         939 :         desc_size = sizeof(struct spdk_blob_md_descriptor_xattr) + strlen(name) + value_len;
    9284         939 :         if (desc_size > SPDK_BS_MAX_DESC_SIZE) {
    9285           4 :                 SPDK_DEBUGLOG(blob, "Xattr '%s' of size %zu does not fix into single page %zu\n", name,
    9286             :                               desc_size, SPDK_BS_MAX_DESC_SIZE);
    9287           4 :                 return -ENOMEM;
    9288             :         }
    9289             : 
    9290         935 :         if (internal) {
    9291         724 :                 xattrs = &blob->xattrs_internal;
    9292         724 :                 blob->invalid_flags |= SPDK_BLOB_INTERNAL_XATTR;
    9293             :         } else {
    9294         211 :                 xattrs = &blob->xattrs;
    9295             :         }
    9296             : 
    9297        1158 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9298         332 :                 if (!strcmp(name, xattr->name)) {
    9299         109 :                         tmp = malloc(value_len);
    9300         109 :                         if (!tmp) {
    9301           0 :                                 return -ENOMEM;
    9302             :                         }
    9303             : 
    9304         109 :                         free(xattr->value);
    9305         109 :                         xattr->value_len = value_len;
    9306         109 :                         xattr->value = tmp;
    9307         109 :                         memcpy(xattr->value, value, value_len);
    9308             : 
    9309         109 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9310             : 
    9311         109 :                         return 0;
    9312             :                 }
    9313             :         }
    9314             : 
    9315         826 :         xattr = calloc(1, sizeof(*xattr));
    9316         826 :         if (!xattr) {
    9317           0 :                 return -ENOMEM;
    9318             :         }
    9319             : 
    9320         826 :         xattr->name = strdup(name);
    9321         826 :         if (!xattr->name) {
    9322           0 :                 free(xattr);
    9323           0 :                 return -ENOMEM;
    9324             :         }
    9325             : 
    9326         826 :         xattr->value_len = value_len;
    9327         826 :         xattr->value = malloc(value_len);
    9328         826 :         if (!xattr->value) {
    9329           0 :                 free(xattr->name);
    9330           0 :                 free(xattr);
    9331           0 :                 return -ENOMEM;
    9332             :         }
    9333         826 :         memcpy(xattr->value, value, value_len);
    9334         826 :         TAILQ_INSERT_TAIL(xattrs, xattr, link);
    9335             : 
    9336         826 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    9337             : 
    9338         826 :         return 0;
    9339             : }
    9340             : 
    9341             : int
    9342         183 : spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9343             :                     uint16_t value_len)
    9344             : {
    9345         183 :         return blob_set_xattr(blob, name, value, value_len, false);
    9346             : }
    9347             : 
    9348             : static int
    9349         404 : blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal)
    9350             : {
    9351             :         struct spdk_xattr_tailq *xattrs;
    9352             :         struct spdk_xattr       *xattr;
    9353             : 
    9354         404 :         blob_verify_md_op(blob);
    9355             : 
    9356         404 :         if (blob->md_ro) {
    9357           4 :                 return -EPERM;
    9358             :         }
    9359         400 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9360             : 
    9361         412 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9362         360 :                 if (!strcmp(name, xattr->name)) {
    9363         348 :                         TAILQ_REMOVE(xattrs, xattr, link);
    9364         348 :                         free(xattr->value);
    9365         348 :                         free(xattr->name);
    9366         348 :                         free(xattr);
    9367             : 
    9368         348 :                         if (internal && TAILQ_EMPTY(&blob->xattrs_internal)) {
    9369         240 :                                 blob->invalid_flags &= ~SPDK_BLOB_INTERNAL_XATTR;
    9370             :                         }
    9371         348 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9372             : 
    9373         348 :                         return 0;
    9374             :                 }
    9375             :         }
    9376             : 
    9377          52 :         return -ENOENT;
    9378             : }
    9379             : 
    9380             : int
    9381          36 : spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name)
    9382             : {
    9383          36 :         return blob_remove_xattr(blob, name, false);
    9384             : }
    9385             : 
    9386             : static int
    9387        2268 : blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9388             :                      const void **value, size_t *value_len, bool internal)
    9389             : {
    9390             :         struct spdk_xattr       *xattr;
    9391             :         struct spdk_xattr_tailq *xattrs;
    9392             : 
    9393        2268 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9394             : 
    9395        2890 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9396        1372 :                 if (!strcmp(name, xattr->name)) {
    9397         750 :                         *value = xattr->value;
    9398         750 :                         *value_len = xattr->value_len;
    9399         750 :                         return 0;
    9400             :                 }
    9401             :         }
    9402        1518 :         return -ENOENT;
    9403             : }
    9404             : 
    9405             : int
    9406         154 : spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9407             :                           const void **value, size_t *value_len)
    9408             : {
    9409         154 :         blob_verify_md_op(blob);
    9410             : 
    9411         154 :         return blob_get_xattr_value(blob, name, value, value_len, false);
    9412             : }
    9413             : 
    9414             : struct spdk_xattr_names {
    9415             :         uint32_t        count;
    9416             :         const char      *names[0];
    9417             : };
    9418             : 
    9419             : static int
    9420           4 : blob_get_xattr_names(struct spdk_xattr_tailq *xattrs, struct spdk_xattr_names **names)
    9421             : {
    9422             :         struct spdk_xattr       *xattr;
    9423           4 :         int                     count = 0;
    9424             : 
    9425          12 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9426           8 :                 count++;
    9427             :         }
    9428             : 
    9429           4 :         *names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *));
    9430           4 :         if (*names == NULL) {
    9431           0 :                 return -ENOMEM;
    9432             :         }
    9433             : 
    9434          12 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9435           8 :                 (*names)->names[(*names)->count++] = xattr->name;
    9436             :         }
    9437             : 
    9438           4 :         return 0;
    9439             : }
    9440             : 
    9441             : int
    9442           4 : spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names)
    9443             : {
    9444           4 :         blob_verify_md_op(blob);
    9445             : 
    9446           4 :         return blob_get_xattr_names(&blob->xattrs, names);
    9447             : }
    9448             : 
    9449             : uint32_t
    9450           4 : spdk_xattr_names_get_count(struct spdk_xattr_names *names)
    9451             : {
    9452           4 :         assert(names != NULL);
    9453             : 
    9454           4 :         return names->count;
    9455             : }
    9456             : 
    9457             : const char *
    9458           8 : spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index)
    9459             : {
    9460           8 :         if (index >= names->count) {
    9461           0 :                 return NULL;
    9462             :         }
    9463             : 
    9464           8 :         return names->names[index];
    9465             : }
    9466             : 
    9467             : void
    9468           4 : spdk_xattr_names_free(struct spdk_xattr_names *names)
    9469             : {
    9470           4 :         free(names);
    9471           4 : }
    9472             : 
    9473             : struct spdk_bs_type
    9474           2 : spdk_bs_get_bstype(struct spdk_blob_store *bs)
    9475             : {
    9476           2 :         return bs->bstype;
    9477             : }
    9478             : 
    9479             : void
    9480           0 : spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype)
    9481             : {
    9482           0 :         memcpy(&bs->bstype, &bstype, sizeof(bstype));
    9483           0 : }
    9484             : 
    9485             : bool
    9486          48 : spdk_blob_is_read_only(struct spdk_blob *blob)
    9487             : {
    9488          48 :         assert(blob != NULL);
    9489          48 :         return (blob->data_ro || blob->md_ro);
    9490             : }
    9491             : 
    9492             : bool
    9493          52 : spdk_blob_is_snapshot(struct spdk_blob *blob)
    9494             : {
    9495             :         struct spdk_blob_list *snapshot_entry;
    9496             : 
    9497          52 :         assert(blob != NULL);
    9498             : 
    9499          52 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    9500          52 :         if (snapshot_entry == NULL) {
    9501          28 :                 return false;
    9502             :         }
    9503             : 
    9504          24 :         return true;
    9505             : }
    9506             : 
    9507             : bool
    9508          68 : spdk_blob_is_clone(struct spdk_blob *blob)
    9509             : {
    9510          68 :         assert(blob != NULL);
    9511             : 
    9512          68 :         if (blob->parent_id != SPDK_BLOBID_INVALID &&
    9513          44 :             blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    9514          40 :                 assert(spdk_blob_is_thin_provisioned(blob));
    9515          40 :                 return true;
    9516             :         }
    9517             : 
    9518          28 :         return false;
    9519             : }
    9520             : 
    9521             : bool
    9522       36462 : spdk_blob_is_thin_provisioned(struct spdk_blob *blob)
    9523             : {
    9524       36462 :         assert(blob != NULL);
    9525       36462 :         return !!(blob->invalid_flags & SPDK_BLOB_THIN_PROV);
    9526             : }
    9527             : 
    9528             : bool
    9529       40888 : spdk_blob_is_esnap_clone(const struct spdk_blob *blob)
    9530             : {
    9531       40888 :         return blob_is_esnap_clone(blob);
    9532             : }
    9533             : 
    9534             : static void
    9535        3426 : blob_update_clear_method(struct spdk_blob *blob)
    9536             : {
    9537             :         enum blob_clear_method stored_cm;
    9538             : 
    9539        3426 :         assert(blob != NULL);
    9540             : 
    9541             :         /* If BLOB_CLEAR_WITH_DEFAULT was passed in, use the setting stored
    9542             :          * in metadata previously.  If something other than the default was
    9543             :          * specified, ignore stored value and used what was passed in.
    9544             :          */
    9545        3426 :         stored_cm = ((blob->md_ro_flags & SPDK_BLOB_CLEAR_METHOD) >> SPDK_BLOB_CLEAR_METHOD_SHIFT);
    9546             : 
    9547        3426 :         if (blob->clear_method == BLOB_CLEAR_WITH_DEFAULT) {
    9548        3426 :                 blob->clear_method = stored_cm;
    9549           0 :         } else if (blob->clear_method != stored_cm) {
    9550           0 :                 SPDK_WARNLOG("Using passed in clear method 0x%x instead of stored value of 0x%x\n",
    9551             :                              blob->clear_method, stored_cm);
    9552             :         }
    9553        3426 : }
    9554             : 
    9555             : spdk_blob_id
    9556         258 : spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blob_id)
    9557             : {
    9558         258 :         struct spdk_blob_list *snapshot_entry = NULL;
    9559         258 :         struct spdk_blob_list *clone_entry = NULL;
    9560             : 
    9561         494 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
    9562         732 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9563         496 :                         if (clone_entry->id == blob_id) {
    9564         168 :                                 return snapshot_entry->id;
    9565             :                         }
    9566             :                 }
    9567             :         }
    9568             : 
    9569          90 :         return SPDK_BLOBID_INVALID;
    9570             : }
    9571             : 
    9572             : int
    9573         196 : spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
    9574             :                      size_t *count)
    9575             : {
    9576             :         struct spdk_blob_list *snapshot_entry, *clone_entry;
    9577             :         size_t n;
    9578             : 
    9579         196 :         snapshot_entry = bs_get_snapshot_entry(bs, blobid);
    9580         196 :         if (snapshot_entry == NULL) {
    9581          28 :                 *count = 0;
    9582          28 :                 return 0;
    9583             :         }
    9584             : 
    9585         168 :         if (ids == NULL || *count < snapshot_entry->clone_count) {
    9586           8 :                 *count = snapshot_entry->clone_count;
    9587           8 :                 return -ENOMEM;
    9588             :         }
    9589         160 :         *count = snapshot_entry->clone_count;
    9590             : 
    9591         160 :         n = 0;
    9592         340 :         TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9593         180 :                 ids[n++] = clone_entry->id;
    9594             :         }
    9595             : 
    9596         160 :         return 0;
    9597             : }
    9598             : 
    9599             : static void
    9600           4 : bs_load_grow_continue(struct spdk_bs_load_ctx *ctx)
    9601             : {
    9602             :         int rc;
    9603             : 
    9604           4 :         if (ctx->super->size == 0) {
    9605           0 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9606             :         }
    9607             : 
    9608           4 :         if (ctx->super->io_unit_size == 0) {
    9609           0 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    9610             :         }
    9611             : 
    9612             :         /* Parse the super block */
    9613           4 :         ctx->bs->clean = 1;
    9614           4 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    9615           4 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    9616           4 :         ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    9617           4 :         if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
    9618           4 :                 ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
    9619             :         }
    9620           4 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    9621           4 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    9622           4 :         if (rc < 0) {
    9623           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9624           0 :                 return;
    9625             :         }
    9626           4 :         ctx->bs->md_start = ctx->super->md_start;
    9627           4 :         ctx->bs->md_len = ctx->super->md_len;
    9628           4 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    9629           4 :         if (rc < 0) {
    9630           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9631           0 :                 return;
    9632             :         }
    9633             : 
    9634           8 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    9635           4 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    9636           4 :         ctx->bs->super_blob = ctx->super->super_blob;
    9637           4 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    9638             : 
    9639           4 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0) {
    9640           0 :                 SPDK_ERRLOG("Can not grow an unclean blobstore, please load it normally to clean it.\n");
    9641           0 :                 bs_load_ctx_fail(ctx, -EIO);
    9642           0 :                 return;
    9643             :         } else {
    9644           4 :                 bs_load_read_used_pages(ctx);
    9645             :         }
    9646             : }
    9647             : 
    9648             : static void
    9649           4 : bs_load_grow_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9650             : {
    9651           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9652             : 
    9653           4 :         if (bserrno != 0) {
    9654           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9655           0 :                 return;
    9656             :         }
    9657           4 :         bs_load_grow_continue(ctx);
    9658             : }
    9659             : 
    9660             : static void
    9661           4 : bs_load_grow_used_clusters_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9662             : {
    9663           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9664             : 
    9665           4 :         if (bserrno != 0) {
    9666           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9667           0 :                 return;
    9668             :         }
    9669             : 
    9670           4 :         spdk_free(ctx->mask);
    9671             : 
    9672           4 :         bs_sequence_write_dev(ctx->seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    9673           4 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    9674             :                               bs_load_grow_super_write_cpl, ctx);
    9675             : }
    9676             : 
    9677             : static void
    9678           4 : bs_load_grow_used_clusters_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9679             : {
    9680           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9681             :         uint64_t                lba, lba_count;
    9682             :         uint64_t                dev_size;
    9683             :         uint64_t                total_clusters;
    9684             : 
    9685           4 :         if (bserrno != 0) {
    9686           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9687           0 :                 return;
    9688             :         }
    9689             : 
    9690             :         /* The type must be correct */
    9691           4 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    9692             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    9693           4 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    9694             :                                              struct spdk_blob_md_page) * 8));
    9695           4 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9696           4 :         total_clusters = dev_size / ctx->super->cluster_size;
    9697           4 :         ctx->mask->length = total_clusters;
    9698             : 
    9699           4 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9700           4 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9701           4 :         bs_sequence_write_dev(ctx->seq, ctx->mask, lba, lba_count,
    9702             :                               bs_load_grow_used_clusters_write_cpl, ctx);
    9703             : }
    9704             : 
    9705             : static void
    9706           4 : bs_load_try_to_grow(struct spdk_bs_load_ctx *ctx)
    9707             : {
    9708             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9709             :         uint64_t lba, lba_count, mask_size;
    9710             : 
    9711           4 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9712           4 :         total_clusters = dev_size / ctx->super->cluster_size;
    9713           4 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9714           4 :                                 spdk_divide_round_up(total_clusters, 8),
    9715             :                                 SPDK_BS_PAGE_SIZE);
    9716           4 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9717             :         /* No necessary to grow or no space to grow */
    9718           4 :         if (ctx->super->size >= dev_size || used_cluster_mask_len > max_used_cluster_mask) {
    9719           0 :                 SPDK_DEBUGLOG(blob, "No grow\n");
    9720           0 :                 bs_load_grow_continue(ctx);
    9721           0 :                 return;
    9722             :         }
    9723             : 
    9724           4 :         SPDK_DEBUGLOG(blob, "Resize blobstore\n");
    9725             : 
    9726           4 :         ctx->super->size = dev_size;
    9727           4 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9728           4 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    9729             : 
    9730           4 :         mask_size = used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    9731           4 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    9732             :                                  SPDK_MALLOC_DMA);
    9733           4 :         if (!ctx->mask) {
    9734           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9735           0 :                 return;
    9736             :         }
    9737           4 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9738           4 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9739           4 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    9740             :                              bs_load_grow_used_clusters_read_cpl, ctx);
    9741             : }
    9742             : 
    9743             : static void
    9744           4 : bs_grow_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9745             : {
    9746           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9747             :         int rc;
    9748             : 
    9749           4 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9750           4 :         if (rc != 0) {
    9751           0 :                 bs_load_ctx_fail(ctx, rc);
    9752           0 :                 return;
    9753             :         }
    9754             : 
    9755           4 :         bs_load_try_to_grow(ctx);
    9756             : }
    9757             : 
    9758             : struct spdk_bs_grow_ctx {
    9759             :         struct spdk_blob_store          *bs;
    9760             :         struct spdk_bs_super_block      *super;
    9761             : 
    9762             :         struct spdk_bit_pool            *new_used_clusters;
    9763             :         struct spdk_bs_md_mask          *new_used_clusters_mask;
    9764             : 
    9765             :         spdk_bs_sequence_t              *seq;
    9766             : };
    9767             : 
    9768             : static void
    9769          32 : bs_grow_live_done(struct spdk_bs_grow_ctx *ctx, int bserrno)
    9770             : {
    9771          32 :         if (bserrno != 0) {
    9772           8 :                 spdk_bit_pool_free(&ctx->new_used_clusters);
    9773             :         }
    9774             : 
    9775          32 :         bs_sequence_finish(ctx->seq, bserrno);
    9776          32 :         free(ctx->new_used_clusters_mask);
    9777          32 :         spdk_free(ctx->super);
    9778          32 :         free(ctx);
    9779          32 : }
    9780             : 
    9781             : static void
    9782           8 : bs_grow_live_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9783             : {
    9784           8 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9785           8 :         struct spdk_blob_store *bs = ctx->bs;
    9786             :         uint64_t total_clusters;
    9787             : 
    9788           8 :         if (bserrno != 0) {
    9789           0 :                 bs_grow_live_done(ctx, bserrno);
    9790           0 :                 return;
    9791             :         }
    9792             : 
    9793             :         /*
    9794             :          * Blobstore is not clean until unload, for now only the super block is up to date.
    9795             :          * This is similar to state right after blobstore init, when bs_write_used_md() didn't
    9796             :          * yet execute.
    9797             :          * When cleanly unloaded, the used md pages will be written out.
    9798             :          * In case of unclean shutdown, loading blobstore will go through recovery path correctly
    9799             :          * filling out the used_clusters with new size and writing it out.
    9800             :          */
    9801           8 :         bs->clean = 0;
    9802             : 
    9803             :         /* Reverting the super->size past this point is complex, avoid any error paths
    9804             :          * that require to do so. */
    9805           8 :         spdk_spin_lock(&bs->used_lock);
    9806             : 
    9807           8 :         total_clusters = ctx->super->size / ctx->super->cluster_size;
    9808             : 
    9809           8 :         assert(total_clusters >= spdk_bit_pool_capacity(bs->used_clusters));
    9810           8 :         spdk_bit_pool_store_mask(bs->used_clusters, ctx->new_used_clusters_mask);
    9811             : 
    9812           8 :         assert(total_clusters == spdk_bit_pool_capacity(ctx->new_used_clusters));
    9813           8 :         spdk_bit_pool_load_mask(ctx->new_used_clusters, ctx->new_used_clusters_mask);
    9814             : 
    9815           8 :         spdk_bit_pool_free(&bs->used_clusters);
    9816           8 :         bs->used_clusters = ctx->new_used_clusters;
    9817             : 
    9818           8 :         bs->total_clusters = total_clusters;
    9819          16 :         bs->total_data_clusters = bs->total_clusters - spdk_divide_round_up(
    9820           8 :                                           bs->md_start + bs->md_len, bs->pages_per_cluster);
    9821             : 
    9822           8 :         bs->num_free_clusters = spdk_bit_pool_count_free(bs->used_clusters);
    9823           8 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    9824           8 :         spdk_spin_unlock(&bs->used_lock);
    9825             : 
    9826           8 :         bs_grow_live_done(ctx, 0);
    9827             : }
    9828             : 
    9829             : static void
    9830          32 : bs_grow_live_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9831             : {
    9832          32 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9833             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9834             :         int rc;
    9835             : 
    9836          32 :         if (bserrno != 0) {
    9837           0 :                 bs_grow_live_done(ctx, bserrno);
    9838           0 :                 return;
    9839             :         }
    9840             : 
    9841          32 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9842          32 :         if (rc != 0) {
    9843           4 :                 bs_grow_live_done(ctx, rc);
    9844           4 :                 return;
    9845             :         }
    9846             : 
    9847          28 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9848          28 :         total_clusters = dev_size / ctx->super->cluster_size;
    9849          28 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9850          28 :                                 spdk_divide_round_up(total_clusters, 8),
    9851             :                                 SPDK_BS_PAGE_SIZE);
    9852          28 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9853             :         /* Only checking dev_size. Since it can change, but total_clusters remain the same. */
    9854          28 :         if (dev_size == ctx->super->size) {
    9855          16 :                 SPDK_DEBUGLOG(blob, "No need to grow blobstore\n");
    9856          16 :                 bs_grow_live_done(ctx, 0);
    9857          16 :                 return;
    9858             :         }
    9859             :         /*
    9860             :          * Blobstore cannot be shrunk, so check before if:
    9861             :          * - new size of the device is smaller than size in super_block
    9862             :          * - new total number of clusters is smaller than used_clusters bit_pool
    9863             :          * - there is enough space in metadata for used_cluster_mask to be written out
    9864             :          */
    9865          12 :         if (dev_size < ctx->super->size ||
    9866          12 :             total_clusters < spdk_bit_pool_capacity(ctx->bs->used_clusters) ||
    9867             :             used_cluster_mask_len > max_used_cluster_mask) {
    9868           4 :                 SPDK_DEBUGLOG(blob, "No space to grow blobstore\n");
    9869           4 :                 bs_grow_live_done(ctx, -ENOSPC);
    9870           4 :                 return;
    9871             :         }
    9872             : 
    9873           8 :         SPDK_DEBUGLOG(blob, "Resizing blobstore\n");
    9874             : 
    9875           8 :         ctx->new_used_clusters_mask = calloc(1, total_clusters);
    9876           8 :         if (!ctx->new_used_clusters_mask) {
    9877           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9878           0 :                 return;
    9879             :         }
    9880           8 :         ctx->new_used_clusters = spdk_bit_pool_create(total_clusters);
    9881           8 :         if (!ctx->new_used_clusters) {
    9882           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9883           0 :                 return;
    9884             :         }
    9885             : 
    9886           8 :         ctx->super->clean = 0;
    9887           8 :         ctx->super->size = dev_size;
    9888           8 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9889           8 :         bs_write_super(seq, ctx->bs, ctx->super, bs_grow_live_super_write_cpl, ctx);
    9890             : }
    9891             : 
    9892             : void
    9893          32 : spdk_bs_grow_live(struct spdk_blob_store *bs,
    9894             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    9895             : {
    9896          32 :         struct spdk_bs_cpl      cpl;
    9897             :         struct spdk_bs_grow_ctx *ctx;
    9898             : 
    9899          32 :         assert(spdk_get_thread() == bs->md_thread);
    9900             : 
    9901          32 :         SPDK_DEBUGLOG(blob, "Growing blobstore on dev %p\n", bs->dev);
    9902             : 
    9903          32 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    9904          32 :         cpl.u.bs_basic.cb_fn = cb_fn;
    9905          32 :         cpl.u.bs_basic.cb_arg = cb_arg;
    9906             : 
    9907          32 :         ctx = calloc(1, sizeof(struct spdk_bs_grow_ctx));
    9908          32 :         if (!ctx) {
    9909           0 :                 cb_fn(cb_arg, -ENOMEM);
    9910           0 :                 return;
    9911             :         }
    9912          32 :         ctx->bs = bs;
    9913             : 
    9914          32 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    9915             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    9916          32 :         if (!ctx->super) {
    9917           0 :                 free(ctx);
    9918           0 :                 cb_fn(cb_arg, -ENOMEM);
    9919           0 :                 return;
    9920             :         }
    9921             : 
    9922          32 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    9923          32 :         if (!ctx->seq) {
    9924           0 :                 spdk_free(ctx->super);
    9925           0 :                 free(ctx);
    9926           0 :                 cb_fn(cb_arg, -ENOMEM);
    9927           0 :                 return;
    9928             :         }
    9929             : 
    9930             :         /* Read the super block */
    9931          32 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    9932          32 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    9933             :                              bs_grow_live_load_super_cpl, ctx);
    9934             : }
    9935             : 
    9936             : void
    9937           4 : spdk_bs_grow(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    9938             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    9939             : {
    9940           4 :         struct spdk_blob_store  *bs;
    9941           4 :         struct spdk_bs_cpl      cpl;
    9942           4 :         struct spdk_bs_load_ctx *ctx;
    9943           4 :         struct spdk_bs_opts     opts = {};
    9944             :         int err;
    9945             : 
    9946           4 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    9947             : 
    9948           4 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    9949           0 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    9950           0 :                 dev->destroy(dev);
    9951           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
    9952           0 :                 return;
    9953             :         }
    9954             : 
    9955           4 :         spdk_bs_opts_init(&opts, sizeof(opts));
    9956           4 :         if (o) {
    9957           4 :                 if (bs_opts_copy(o, &opts)) {
    9958           0 :                         return;
    9959             :                 }
    9960             :         }
    9961             : 
    9962           4 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
    9963           0 :                 dev->destroy(dev);
    9964           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
    9965           0 :                 return;
    9966             :         }
    9967             : 
    9968           4 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    9969           4 :         if (err) {
    9970           0 :                 dev->destroy(dev);
    9971           0 :                 cb_fn(cb_arg, NULL, err);
    9972           0 :                 return;
    9973             :         }
    9974             : 
    9975           4 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    9976           4 :         cpl.u.bs_handle.cb_fn = cb_fn;
    9977           4 :         cpl.u.bs_handle.cb_arg = cb_arg;
    9978           4 :         cpl.u.bs_handle.bs = bs;
    9979             : 
    9980           4 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    9981           4 :         if (!ctx->seq) {
    9982           0 :                 spdk_free(ctx->super);
    9983           0 :                 free(ctx);
    9984           0 :                 bs_free(bs);
    9985           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9986           0 :                 return;
    9987             :         }
    9988             : 
    9989             :         /* Read the super block */
    9990           4 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    9991           4 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    9992             :                              bs_grow_load_super_cpl, ctx);
    9993             : }
    9994             : 
    9995             : int
    9996          24 : spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
    9997             : {
    9998          24 :         if (!blob_is_esnap_clone(blob)) {
    9999          12 :                 return -EINVAL;
   10000             :         }
   10001             : 
   10002          12 :         return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
   10003             : }
   10004             : 
   10005             : struct spdk_io_channel *
   10006        8840 : blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
   10007             : {
   10008        8840 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(ch);
   10009        8840 :         struct spdk_bs_dev              *bs_dev = blob->back_bs_dev;
   10010        8840 :         struct blob_esnap_channel       find = {};
   10011             :         struct blob_esnap_channel       *esnap_channel, *existing;
   10012             : 
   10013        8840 :         find.blob_id = blob->id;
   10014        8840 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10015        8840 :         if (spdk_likely(esnap_channel != NULL)) {
   10016        8796 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
   10017             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10018        8796 :                 return esnap_channel->channel;
   10019             :         }
   10020             : 
   10021          44 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
   10022             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
   10023             : 
   10024          44 :         esnap_channel = calloc(1, sizeof(*esnap_channel));
   10025          44 :         if (esnap_channel == NULL) {
   10026           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
   10027             :                                find.blob_id);
   10028           0 :                 return NULL;
   10029             :         }
   10030          44 :         esnap_channel->channel = bs_dev->create_channel(bs_dev);
   10031          44 :         if (esnap_channel->channel == NULL) {
   10032           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
   10033           0 :                 free(esnap_channel);
   10034           0 :                 return NULL;
   10035             :         }
   10036          44 :         esnap_channel->blob_id = find.blob_id;
   10037          44 :         existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10038          44 :         if (spdk_unlikely(existing != NULL)) {
   10039             :                 /*
   10040             :                  * This should be unreachable: all modifications to this tree happen on this thread.
   10041             :                  */
   10042           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
   10043           0 :                 assert(false);
   10044             : 
   10045             :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10046             :                 free(esnap_channel);
   10047             : 
   10048             :                 return existing->channel;
   10049             :         }
   10050             : 
   10051          44 :         return esnap_channel->channel;
   10052             : }
   10053             : 
   10054             : static int
   10055        8816 : blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
   10056             : {
   10057        8816 :         return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
   10058             : }
   10059             : 
   10060             : struct blob_esnap_destroy_ctx {
   10061             :         spdk_blob_op_with_handle_complete       cb_fn;
   10062             :         void                                    *cb_arg;
   10063             :         struct spdk_blob                        *blob;
   10064             :         struct spdk_bs_dev                      *back_bs_dev;
   10065             :         bool                                    abort_io;
   10066             : };
   10067             : 
   10068             : static void
   10069         136 : blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
   10070             : {
   10071         136 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10072         136 :         struct spdk_blob                *blob = ctx->blob;
   10073         136 :         struct spdk_blob_store          *bs = blob->bs;
   10074             : 
   10075         136 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
   10076             :                       blob->id);
   10077             : 
   10078         136 :         if (ctx->cb_fn != NULL) {
   10079         124 :                 ctx->cb_fn(ctx->cb_arg, blob, status);
   10080             :         }
   10081         136 :         free(ctx);
   10082             : 
   10083         136 :         bs->esnap_channels_unloading--;
   10084         136 :         if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) {
   10085           4 :                 spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg);
   10086             :         }
   10087         136 : }
   10088             : 
   10089             : static void
   10090         144 : blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
   10091             : {
   10092         144 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10093         144 :         struct spdk_blob                *blob = ctx->blob;
   10094         144 :         struct spdk_bs_dev              *bs_dev = ctx->back_bs_dev;
   10095         144 :         struct spdk_io_channel          *channel = spdk_io_channel_iter_get_channel(i);
   10096         144 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(channel);
   10097             :         struct blob_esnap_channel       *esnap_channel;
   10098         144 :         struct blob_esnap_channel       find = {};
   10099             : 
   10100         144 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));
   10101             : 
   10102         144 :         find.blob_id = blob->id;
   10103         144 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10104         144 :         if (esnap_channel != NULL) {
   10105          12 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
   10106             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10107          12 :                 RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10108             : 
   10109          12 :                 if (ctx->abort_io) {
   10110             :                         spdk_bs_user_op_t *op, *tmp;
   10111             : 
   10112           8 :                         TAILQ_FOREACH_SAFE(op, &bs_channel->queued_io, link, tmp) {
   10113           0 :                                 if (op->back_channel == esnap_channel->channel) {
   10114           0 :                                         TAILQ_REMOVE(&bs_channel->queued_io, op, link);
   10115           0 :                                         bs_user_op_abort(op, -EIO);
   10116             :                                 }
   10117             :                         }
   10118             :                 }
   10119             : 
   10120          12 :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10121          12 :                 free(esnap_channel);
   10122             :         }
   10123             : 
   10124         144 :         spdk_for_each_channel_continue(i, 0);
   10125         144 : }
   10126             : 
   10127             : /*
   10128             :  * Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
   10129             :  * used when closing an esnap clone blob and after decoupling from the parent.
   10130             :  */
   10131             : static void
   10132         480 : blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
   10133             :                                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
   10134             : {
   10135             :         struct blob_esnap_destroy_ctx   *ctx;
   10136             : 
   10137         480 :         if (!blob_is_esnap_clone(blob) || blob->back_bs_dev == NULL) {
   10138         344 :                 if (cb_fn != NULL) {
   10139         344 :                         cb_fn(cb_arg, blob, 0);
   10140             :                 }
   10141         344 :                 return;
   10142             :         }
   10143             : 
   10144         136 :         ctx = calloc(1, sizeof(*ctx));
   10145         136 :         if (ctx == NULL) {
   10146           0 :                 if (cb_fn != NULL) {
   10147           0 :                         cb_fn(cb_arg, blob, -ENOMEM);
   10148             :                 }
   10149           0 :                 return;
   10150             :         }
   10151         136 :         ctx->cb_fn = cb_fn;
   10152         136 :         ctx->cb_arg = cb_arg;
   10153         136 :         ctx->blob = blob;
   10154         136 :         ctx->back_bs_dev = blob->back_bs_dev;
   10155         136 :         ctx->abort_io = abort_io;
   10156             : 
   10157         136 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
   10158             :                       blob->id);
   10159             : 
   10160         136 :         blob->bs->esnap_channels_unloading++;
   10161         136 :         spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
   10162             :                               blob_esnap_destroy_channels_done);
   10163             : }
   10164             : 
   10165             : /*
   10166             :  * Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
   10167             :  * bs_channel is destroyed.
   10168             :  */
   10169             : static void
   10170        1025 : blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
   10171             : {
   10172             :         struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;
   10173             : 
   10174        1025 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));
   10175             : 
   10176        1025 :         SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
   10177             :                       spdk_thread_get_name(spdk_get_thread()));
   10178        1057 :         RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
   10179             :                         esnap_channel_tmp) {
   10180          32 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
   10181             :                               ": destroying one channel in thread %s\n",
   10182             :                               esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
   10183          32 :                 RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
   10184          32 :                 spdk_put_io_channel(esnap_channel->channel);
   10185          32 :                 free(esnap_channel);
   10186             :         }
   10187        1025 :         SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
   10188             :                       spdk_thread_get_name(spdk_get_thread()));
   10189        1025 : }
   10190             : 
   10191             : static void
   10192          28 : blob_set_back_bs_dev_done(void *_ctx, int bserrno)
   10193             : {
   10194          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10195             : 
   10196          28 :         if (bserrno != 0) {
   10197             :                 /* Even though the unfreeze failed, the update may have succeed. */
   10198           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": unfreeze failed with error %d\n", ctx->blob->id,
   10199             :                             bserrno);
   10200             :         }
   10201          28 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
   10202          28 :         free(ctx);
   10203          28 : }
   10204             : 
   10205             : static void
   10206          28 : blob_frozen_set_back_bs_dev(void *_ctx, struct spdk_blob *blob, int bserrno)
   10207             : {
   10208          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10209             :         int rc;
   10210             : 
   10211          28 :         if (bserrno != 0) {
   10212           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to release old back_bs_dev with error %d\n",
   10213             :                             blob->id, bserrno);
   10214           0 :                 ctx->bserrno = bserrno;
   10215           0 :                 blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10216           0 :                 return;
   10217             :         }
   10218             : 
   10219          28 :         if (blob->back_bs_dev != NULL) {
   10220          28 :                 blob->back_bs_dev->destroy(blob->back_bs_dev);
   10221          28 :                 blob->back_bs_dev = NULL;
   10222             :         }
   10223             : 
   10224          28 :         if (ctx->parent_refs_cb_fn) {
   10225          20 :                 rc = ctx->parent_refs_cb_fn(blob, ctx->parent_refs_cb_arg);
   10226          20 :                 if (rc != 0) {
   10227           0 :                         ctx->bserrno = rc;
   10228           0 :                         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10229           0 :                         return;
   10230             :                 }
   10231             :         }
   10232             : 
   10233          28 :         SPDK_NOTICELOG("blob 0x%" PRIx64 ": hotplugged back_bs_dev\n", blob->id);
   10234          28 :         blob->back_bs_dev = ctx->back_bs_dev;
   10235          28 :         ctx->bserrno = 0;
   10236             : 
   10237          28 :         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10238             : }
   10239             : 
   10240             : static void
   10241          28 : blob_set_back_bs_dev_frozen(void *_ctx, int bserrno)
   10242             : {
   10243          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10244          28 :         struct spdk_blob        *blob = ctx->blob;
   10245             : 
   10246          28 :         if (bserrno != 0) {
   10247           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to freeze with error %d\n", blob->id,
   10248             :                             bserrno);
   10249           0 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
   10250           0 :                 free(ctx);
   10251           0 :                 return;
   10252             :         }
   10253             : 
   10254             :         /*
   10255             :          * This does not prevent future reads from the esnap device because any future IO will
   10256             :          * lazily create a new esnap IO channel.
   10257             :          */
   10258          28 :         blob_esnap_destroy_bs_dev_channels(blob, true, blob_frozen_set_back_bs_dev, ctx);
   10259             : }
   10260             : 
   10261             : void
   10262           8 : spdk_blob_set_esnap_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
   10263             :                            spdk_blob_op_complete cb_fn, void *cb_arg)
   10264             : {
   10265           8 :         if (!blob_is_esnap_clone(blob)) {
   10266           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10267           0 :                 cb_fn(cb_arg, -EINVAL);
   10268           0 :                 return;
   10269             :         }
   10270             : 
   10271           8 :         blob_set_back_bs_dev(blob, back_bs_dev, NULL, NULL, cb_fn, cb_arg);
   10272             : }
   10273             : 
   10274             : struct spdk_bs_dev *
   10275           4 : spdk_blob_get_esnap_bs_dev(const struct spdk_blob *blob)
   10276             : {
   10277           4 :         if (!blob_is_esnap_clone(blob)) {
   10278           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10279           0 :                 return NULL;
   10280             :         }
   10281             : 
   10282           4 :         return blob->back_bs_dev;
   10283             : }
   10284             : 
   10285             : bool
   10286          28 : spdk_blob_is_degraded(const struct spdk_blob *blob)
   10287             : {
   10288          28 :         if (blob->bs->dev->is_degraded != NULL && blob->bs->dev->is_degraded(blob->bs->dev)) {
   10289           4 :                 return true;
   10290             :         }
   10291          24 :         if (blob->back_bs_dev == NULL || blob->back_bs_dev->is_degraded == NULL) {
   10292          12 :                 return false;
   10293             :         }
   10294             : 
   10295          12 :         return blob->back_bs_dev->is_degraded(blob->back_bs_dev);
   10296             : }
   10297             : 
   10298           3 : SPDK_LOG_REGISTER_COMPONENT(blob)
   10299           3 : SPDK_LOG_REGISTER_COMPONENT(blob_esnap)

Generated by: LCOV version 1.15