LCOV - code coverage report
Current view: top level - lib/blob - blobstore.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 4086 5117 79.9 %
Date: 2024-07-12 06:35:58 Functions: 338 360 93.9 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #include "spdk/blob.h"
      10             : #include "spdk/crc32.h"
      11             : #include "spdk/env.h"
      12             : #include "spdk/queue.h"
      13             : #include "spdk/thread.h"
      14             : #include "spdk/bit_array.h"
      15             : #include "spdk/bit_pool.h"
      16             : #include "spdk/likely.h"
      17             : #include "spdk/util.h"
      18             : #include "spdk/string.h"
      19             : #include "spdk/trace.h"
      20             : 
      21             : #include "spdk_internal/assert.h"
      22             : #include "spdk_internal/trace_defs.h"
      23             : #include "spdk/log.h"
      24             : 
      25             : #include "blobstore.h"
      26             : 
      27             : #define BLOB_CRC32C_INITIAL    0xffffffffUL
      28             : 
      29             : static int bs_register_md_thread(struct spdk_blob_store *bs);
      30             : static int bs_unregister_md_thread(struct spdk_blob_store *bs);
      31             : static void blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
      32             : static void blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      33             :                 uint64_t cluster, uint32_t extent, struct spdk_blob_md_page *page,
      34             :                 spdk_blob_op_complete cb_fn, void *cb_arg);
      35             : static void blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      36             :                 uint32_t extent_page, struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      37             : 
      38             : static int blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
      39             :                           uint16_t value_len, bool internal);
      40             : static int blob_get_xattr_value(struct spdk_blob *blob, const char *name,
      41             :                                 const void **value, size_t *value_len, bool internal);
      42             : static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);
      43             : 
      44             : static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
      45             :                                    struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      46             : static void blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg);
      47             : 
      48             : static void bs_shallow_copy_cluster_find_next(void *cb_arg);
      49             : 
      50             : /*
      51             :  * External snapshots require a channel per thread per esnap bdev.  The tree
      52             :  * is populated lazily as blob IOs are handled by the back_bs_dev. When this
      53             :  * channel is destroyed, all the channels in the tree are destroyed.
      54             :  */
      55             : 
      56             : struct blob_esnap_channel {
      57             :         RB_ENTRY(blob_esnap_channel)    node;
      58             :         spdk_blob_id                    blob_id;
      59             :         struct spdk_io_channel          *channel;
      60             : };
      61             : 
      62             : static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
      63             : static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
      64             :                 spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
      65             : static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
      66             : static void blob_set_back_bs_dev_frozen(void *_ctx, int bserrno);
      67       10225 : RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)
      68             : 
      69             : static inline bool
      70       49534 : blob_is_esnap_clone(const struct spdk_blob *blob)
      71             : {
      72       49534 :         assert(blob != NULL);
      73       49534 :         return !!(blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT);
      74             : }
      75             : 
      76             : static int
      77        2273 : blob_id_cmp(struct spdk_blob *blob1, struct spdk_blob *blob2)
      78             : {
      79        2273 :         assert(blob1 != NULL && blob2 != NULL);
      80        2273 :         return (blob1->id < blob2->id ? -1 : blob1->id > blob2->id);
      81             : }
      82             : 
      83       15289 : RB_GENERATE_STATIC(spdk_blob_tree, spdk_blob, link, blob_id_cmp);
      84             : 
      85             : static void
      86       36883 : blob_verify_md_op(struct spdk_blob *blob)
      87             : {
      88       36883 :         assert(blob != NULL);
      89       36883 :         assert(spdk_get_thread() == blob->bs->md_thread);
      90       36883 :         assert(blob->state != SPDK_BLOB_STATE_LOADING);
      91       36883 : }
      92             : 
      93             : static struct spdk_blob_list *
      94        3816 : bs_get_snapshot_entry(struct spdk_blob_store *bs, spdk_blob_id blobid)
      95             : {
      96        3816 :         struct spdk_blob_list *snapshot_entry = NULL;
      97             : 
      98        4796 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
      99        1748 :                 if (snapshot_entry->id == blobid) {
     100         768 :                         break;
     101             :                 }
     102             :         }
     103             : 
     104        3816 :         return snapshot_entry;
     105             : }
     106             : 
     107             : static void
     108        2900 : bs_claim_md_page(struct spdk_blob_store *bs, uint32_t page)
     109             : {
     110        2900 :         assert(spdk_spin_held(&bs->used_lock));
     111        2900 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     112        2900 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == false);
     113             : 
     114        2900 :         spdk_bit_array_set(bs->used_md_pages, page);
     115        2900 : }
     116             : 
     117             : static void
     118        2196 : bs_release_md_page(struct spdk_blob_store *bs, uint32_t page)
     119             : {
     120        2196 :         assert(spdk_spin_held(&bs->used_lock));
     121        2196 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     122        2196 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == true);
     123             : 
     124        2196 :         spdk_bit_array_clear(bs->used_md_pages, page);
     125        2196 : }
     126             : 
     127             : static uint32_t
     128        8220 : bs_claim_cluster(struct spdk_blob_store *bs)
     129             : {
     130             :         uint32_t cluster_num;
     131             : 
     132        8220 :         assert(spdk_spin_held(&bs->used_lock));
     133             : 
     134        8220 :         cluster_num = spdk_bit_pool_allocate_bit(bs->used_clusters);
     135        8220 :         if (cluster_num == UINT32_MAX) {
     136           0 :                 return UINT32_MAX;
     137             :         }
     138             : 
     139        8220 :         SPDK_DEBUGLOG(blob, "Claiming cluster %u\n", cluster_num);
     140        8220 :         bs->num_free_clusters--;
     141             : 
     142        8220 :         return cluster_num;
     143             : }
     144             : 
     145             : static void
     146        2399 : bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
     147             : {
     148        2399 :         assert(spdk_spin_held(&bs->used_lock));
     149        2399 :         assert(cluster_num < spdk_bit_pool_capacity(bs->used_clusters));
     150        2399 :         assert(spdk_bit_pool_is_allocated(bs->used_clusters, cluster_num) == true);
     151        2399 :         assert(bs->num_free_clusters < bs->total_clusters);
     152             : 
     153        2399 :         SPDK_DEBUGLOG(blob, "Releasing cluster %u\n", cluster_num);
     154             : 
     155        2399 :         spdk_bit_pool_free_bit(bs->used_clusters, cluster_num);
     156        2399 :         bs->num_free_clusters++;
     157        2399 : }
     158             : 
     159             : static int
     160        8220 : blob_insert_cluster(struct spdk_blob *blob, uint32_t cluster_num, uint64_t cluster)
     161             : {
     162        8220 :         uint64_t *cluster_lba = &blob->active.clusters[cluster_num];
     163             : 
     164        8220 :         blob_verify_md_op(blob);
     165             : 
     166        8220 :         if (*cluster_lba != 0) {
     167           4 :                 return -EEXIST;
     168             :         }
     169             : 
     170        8216 :         *cluster_lba = bs_cluster_to_lba(blob->bs, cluster);
     171        8216 :         blob->active.num_allocated_clusters++;
     172             : 
     173        8216 :         return 0;
     174             : }
     175             : 
     176             : static int
     177        8220 : bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
     178             :                     uint64_t *cluster, uint32_t *lowest_free_md_page, bool update_map)
     179             : {
     180        8220 :         uint32_t *extent_page = 0;
     181             : 
     182        8220 :         assert(spdk_spin_held(&blob->bs->used_lock));
     183             : 
     184        8220 :         *cluster = bs_claim_cluster(blob->bs);
     185        8220 :         if (*cluster == UINT32_MAX) {
     186             :                 /* No more free clusters. Cannot satisfy the request */
     187           0 :                 return -ENOSPC;
     188             :         }
     189             : 
     190        8220 :         if (blob->use_extent_table) {
     191        4168 :                 extent_page = bs_cluster_to_extent_page(blob, cluster_num);
     192        4168 :                 if (*extent_page == 0) {
     193             :                         /* Extent page shall never occupy md_page so start the search from 1 */
     194         728 :                         if (*lowest_free_md_page == 0) {
     195         726 :                                 *lowest_free_md_page = 1;
     196             :                         }
     197             :                         /* No extent_page is allocated for the cluster */
     198         728 :                         *lowest_free_md_page = spdk_bit_array_find_first_clear(blob->bs->used_md_pages,
     199             :                                                *lowest_free_md_page);
     200         728 :                         if (*lowest_free_md_page == UINT32_MAX) {
     201             :                                 /* No more free md pages. Cannot satisfy the request */
     202           0 :                                 bs_release_cluster(blob->bs, *cluster);
     203           0 :                                 return -ENOSPC;
     204             :                         }
     205         728 :                         bs_claim_md_page(blob->bs, *lowest_free_md_page);
     206             :                 }
     207             :         }
     208             : 
     209        8220 :         SPDK_DEBUGLOG(blob, "Claiming cluster %" PRIu64 " for blob 0x%" PRIx64 "\n", *cluster,
     210             :                       blob->id);
     211             : 
     212        8220 :         if (update_map) {
     213        7404 :                 blob_insert_cluster(blob, cluster_num, *cluster);
     214        7404 :                 if (blob->use_extent_table && *extent_page == 0) {
     215         644 :                         *extent_page = *lowest_free_md_page;
     216             :                 }
     217             :         }
     218             : 
     219        8220 :         return 0;
     220             : }
     221             : 
     222             : static void
     223        5570 : blob_xattrs_init(struct spdk_blob_xattr_opts *xattrs)
     224             : {
     225        5570 :         xattrs->count = 0;
     226        5570 :         xattrs->names = NULL;
     227        5570 :         xattrs->ctx = NULL;
     228        5570 :         xattrs->get_value = NULL;
     229        5570 : }
     230             : 
     231             : void
     232        3680 : spdk_blob_opts_init(struct spdk_blob_opts *opts, size_t opts_size)
     233             : {
     234        3680 :         if (!opts) {
     235           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     236           0 :                 return;
     237             :         }
     238             : 
     239        3680 :         if (!opts_size) {
     240           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     241           0 :                 return;
     242             :         }
     243             : 
     244        3680 :         memset(opts, 0, opts_size);
     245        3680 :         opts->opts_size = opts_size;
     246             : 
     247             : #define FIELD_OK(field) \
     248             :         offsetof(struct spdk_blob_opts, field) + sizeof(opts->field) <= opts_size
     249             : 
     250             : #define SET_FIELD(field, value) \
     251             :         if (FIELD_OK(field)) { \
     252             :                 opts->field = value; \
     253             :         } \
     254             : 
     255        3680 :         SET_FIELD(num_clusters, 0);
     256        3680 :         SET_FIELD(thin_provision, false);
     257        3680 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     258             : 
     259        3680 :         if (FIELD_OK(xattrs)) {
     260        3680 :                 blob_xattrs_init(&opts->xattrs);
     261             :         }
     262             : 
     263        3680 :         SET_FIELD(use_extent_table, true);
     264             : 
     265             : #undef FIELD_OK
     266             : #undef SET_FIELD
     267             : }
     268             : 
     269             : void
     270        3470 : spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts, size_t opts_size)
     271             : {
     272        3470 :         if (!opts) {
     273           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     274           0 :                 return;
     275             :         }
     276             : 
     277        3470 :         if (!opts_size) {
     278           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     279           0 :                 return;
     280             :         }
     281             : 
     282        3470 :         memset(opts, 0, opts_size);
     283        3470 :         opts->opts_size = opts_size;
     284             : 
     285             : #define FIELD_OK(field) \
     286             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(opts->field) <= opts_size
     287             : 
     288             : #define SET_FIELD(field, value) \
     289             :         if (FIELD_OK(field)) { \
     290             :                 opts->field = value; \
     291             :         } \
     292             : 
     293        3470 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     294             : 
     295             : #undef FIELD_OK
     296             : #undef SET_FILED
     297             : }
     298             : 
     299             : static struct spdk_blob *
     300        5356 : blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id)
     301             : {
     302             :         struct spdk_blob *blob;
     303             : 
     304        5356 :         blob = calloc(1, sizeof(*blob));
     305        5356 :         if (!blob) {
     306           0 :                 return NULL;
     307             :         }
     308             : 
     309        5356 :         blob->id = id;
     310        5356 :         blob->bs = bs;
     311             : 
     312        5356 :         blob->parent_id = SPDK_BLOBID_INVALID;
     313             : 
     314        5356 :         blob->state = SPDK_BLOB_STATE_DIRTY;
     315        5356 :         blob->extent_rle_found = false;
     316        5356 :         blob->extent_table_found = false;
     317        5356 :         blob->active.num_pages = 1;
     318        5356 :         blob->active.pages = calloc(1, sizeof(*blob->active.pages));
     319        5356 :         if (!blob->active.pages) {
     320           0 :                 free(blob);
     321           0 :                 return NULL;
     322             :         }
     323             : 
     324        5356 :         blob->active.pages[0] = bs_blobid_to_page(id);
     325             : 
     326        5356 :         TAILQ_INIT(&blob->xattrs);
     327        5356 :         TAILQ_INIT(&blob->xattrs_internal);
     328        5356 :         TAILQ_INIT(&blob->pending_persists);
     329        5356 :         TAILQ_INIT(&blob->persists_to_complete);
     330             : 
     331        5356 :         return blob;
     332             : }
     333             : 
     334             : static void
     335       10712 : xattrs_free(struct spdk_xattr_tailq *xattrs)
     336             : {
     337             :         struct spdk_xattr       *xattr, *xattr_tmp;
     338             : 
     339       12466 :         TAILQ_FOREACH_SAFE(xattr, xattrs, link, xattr_tmp) {
     340        1754 :                 TAILQ_REMOVE(xattrs, xattr, link);
     341        1754 :                 free(xattr->name);
     342        1754 :                 free(xattr->value);
     343        1754 :                 free(xattr);
     344             :         }
     345       10712 : }
     346             : 
     347             : static void
     348        5356 : blob_free(struct spdk_blob *blob)
     349             : {
     350        5356 :         assert(blob != NULL);
     351        5356 :         assert(TAILQ_EMPTY(&blob->pending_persists));
     352        5356 :         assert(TAILQ_EMPTY(&blob->persists_to_complete));
     353             : 
     354        5356 :         free(blob->active.extent_pages);
     355        5356 :         free(blob->clean.extent_pages);
     356        5356 :         free(blob->active.clusters);
     357        5356 :         free(blob->clean.clusters);
     358        5356 :         free(blob->active.pages);
     359        5356 :         free(blob->clean.pages);
     360             : 
     361        5356 :         xattrs_free(&blob->xattrs);
     362        5356 :         xattrs_free(&blob->xattrs_internal);
     363             : 
     364        5356 :         if (blob->back_bs_dev) {
     365        1080 :                 blob->back_bs_dev->destroy(blob->back_bs_dev);
     366             :         }
     367             : 
     368        5356 :         free(blob);
     369        5356 : }
     370             : 
     371             : static void
     372         320 : blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
     373             : {
     374         320 :         struct spdk_bs_dev      *bs_dev = ctx;
     375             : 
     376         320 :         if (bserrno != 0) {
     377             :                 /*
     378             :                  * This is probably due to a memory allocation failure when creating the
     379             :                  * blob_esnap_destroy_ctx before iterating threads.
     380             :                  */
     381           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
     382             :                             blob->id, bserrno);
     383           0 :                 assert(false);
     384             :         }
     385             : 
     386         320 :         if (bs_dev == NULL) {
     387             :                 /*
     388             :                  * This check exists to make scanbuild happy.
     389             :                  *
     390             :                  * blob->back_bs_dev for an esnap is NULL during the first iteration of blobs while
     391             :                  * the blobstore is being loaded. It could also be NULL if there was an error
     392             :                  * opening the esnap device. In each of these cases, no channels could have been
     393             :                  * created because back_bs_dev->create_channel() would have led to a NULL pointer
     394             :                  * deref.
     395             :                  */
     396           0 :                 assert(false);
     397             :                 return;
     398             :         }
     399             : 
     400         320 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
     401         320 :         bs_dev->destroy(bs_dev);
     402             : }
     403             : 
     404             : static void
     405         320 : blob_back_bs_destroy(struct spdk_blob *blob)
     406             : {
     407         320 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
     408             :                       blob->id);
     409             : 
     410         320 :         blob_esnap_destroy_bs_dev_channels(blob, false, blob_back_bs_destroy_esnap_done,
     411         320 :                                            blob->back_bs_dev);
     412         320 :         blob->back_bs_dev = NULL;
     413         320 : }
     414             : 
     415             : struct blob_parent {
     416             :         union {
     417             :                 struct {
     418             :                         spdk_blob_id id;
     419             :                         struct spdk_blob *blob;
     420             :                 } snapshot;
     421             : 
     422             :                 struct {
     423             :                         void *id;
     424             :                         uint32_t id_len;
     425             :                         struct spdk_bs_dev *back_bs_dev;
     426             :                 } esnap;
     427             :         } u;
     428             : };
     429             : 
     430             : typedef int (*set_parent_refs_cb)(struct spdk_blob *blob, struct blob_parent *parent);
     431             : 
     432             : struct set_bs_dev_ctx {
     433             :         struct spdk_blob        *blob;
     434             :         struct spdk_bs_dev      *back_bs_dev;
     435             : 
     436             :         /*
     437             :          * This callback is used during a set parent operation to change the references
     438             :          * to the parent of the blob.
     439             :          */
     440             :         set_parent_refs_cb      parent_refs_cb_fn;
     441             :         struct blob_parent      *parent_refs_cb_arg;
     442             : 
     443             :         spdk_blob_op_complete   cb_fn;
     444             :         void                    *cb_arg;
     445             :         int                     bserrno;
     446             : };
     447             : 
     448             : static void
     449          28 : blob_set_back_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
     450             :                      set_parent_refs_cb parent_refs_cb_fn, struct blob_parent *parent_refs_cb_arg,
     451             :                      spdk_blob_op_complete cb_fn, void *cb_arg)
     452             : {
     453             :         struct set_bs_dev_ctx   *ctx;
     454             : 
     455          28 :         ctx = calloc(1, sizeof(*ctx));
     456          28 :         if (ctx == NULL) {
     457           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": out of memory while setting back_bs_dev\n",
     458             :                             blob->id);
     459           0 :                 cb_fn(cb_arg, -ENOMEM);
     460           0 :                 return;
     461             :         }
     462             : 
     463          28 :         ctx->parent_refs_cb_fn = parent_refs_cb_fn;
     464          28 :         ctx->parent_refs_cb_arg = parent_refs_cb_arg;
     465          28 :         ctx->cb_fn = cb_fn;
     466          28 :         ctx->cb_arg = cb_arg;
     467          28 :         ctx->back_bs_dev = back_bs_dev;
     468          28 :         ctx->blob = blob;
     469             : 
     470          28 :         blob_freeze_io(blob, blob_set_back_bs_dev_frozen, ctx);
     471             : }
     472             : 
     473             : struct freeze_io_ctx {
     474             :         struct spdk_bs_cpl cpl;
     475             :         struct spdk_blob *blob;
     476             : };
     477             : 
     478             : static void
     479         526 : blob_io_sync(struct spdk_io_channel_iter *i)
     480             : {
     481         526 :         spdk_for_each_channel_continue(i, 0);
     482         526 : }
     483             : 
     484             : static void
     485         514 : blob_execute_queued_io(struct spdk_io_channel_iter *i)
     486             : {
     487         514 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
     488         514 :         struct spdk_bs_channel *ch = spdk_io_channel_get_ctx(_ch);
     489         514 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     490             :         struct spdk_bs_request_set      *set;
     491             :         struct spdk_bs_user_op_args     *args;
     492             :         spdk_bs_user_op_t *op, *tmp;
     493             : 
     494         518 :         TAILQ_FOREACH_SAFE(op, &ch->queued_io, link, tmp) {
     495           4 :                 set = (struct spdk_bs_request_set *)op;
     496           4 :                 args = &set->u.user_op;
     497             : 
     498           4 :                 if (args->blob == ctx->blob) {
     499           4 :                         TAILQ_REMOVE(&ch->queued_io, op, link);
     500           4 :                         bs_user_op_execute(op);
     501             :                 }
     502             :         }
     503             : 
     504         514 :         spdk_for_each_channel_continue(i, 0);
     505         514 : }
     506             : 
     507             : static void
     508        1008 : blob_io_cpl(struct spdk_io_channel_iter *i, int status)
     509             : {
     510        1008 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     511             : 
     512        1008 :         ctx->cpl.u.blob_basic.cb_fn(ctx->cpl.u.blob_basic.cb_arg, 0);
     513             : 
     514        1008 :         free(ctx);
     515        1008 : }
     516             : 
     517             : static void
     518         510 : blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     519             : {
     520             :         struct freeze_io_ctx *ctx;
     521             : 
     522         510 :         blob_verify_md_op(blob);
     523             : 
     524         510 :         ctx = calloc(1, sizeof(*ctx));
     525         510 :         if (!ctx) {
     526           0 :                 cb_fn(cb_arg, -ENOMEM);
     527           0 :                 return;
     528             :         }
     529             : 
     530         510 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     531         510 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     532         510 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     533         510 :         ctx->blob = blob;
     534             : 
     535             :         /* Freeze I/O on blob */
     536         510 :         blob->frozen_refcnt++;
     537             : 
     538         510 :         spdk_for_each_channel(blob->bs, blob_io_sync, ctx, blob_io_cpl);
     539             : }
     540             : 
     541             : static void
     542         498 : blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     543             : {
     544             :         struct freeze_io_ctx *ctx;
     545             : 
     546         498 :         blob_verify_md_op(blob);
     547             : 
     548         498 :         ctx = calloc(1, sizeof(*ctx));
     549         498 :         if (!ctx) {
     550           0 :                 cb_fn(cb_arg, -ENOMEM);
     551           0 :                 return;
     552             :         }
     553             : 
     554         498 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     555         498 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     556         498 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     557         498 :         ctx->blob = blob;
     558             : 
     559         498 :         assert(blob->frozen_refcnt > 0);
     560             : 
     561         498 :         blob->frozen_refcnt--;
     562             : 
     563         498 :         spdk_for_each_channel(blob->bs, blob_execute_queued_io, ctx, blob_io_cpl);
     564             : }
     565             : 
     566             : static int
     567        8442 : blob_mark_clean(struct spdk_blob *blob)
     568             : {
     569        8442 :         uint32_t *extent_pages = NULL;
     570        8442 :         uint64_t *clusters = NULL;
     571        8442 :         uint32_t *pages = NULL;
     572             : 
     573        8442 :         assert(blob != NULL);
     574             : 
     575        8442 :         if (blob->active.num_extent_pages) {
     576        2845 :                 assert(blob->active.extent_pages);
     577        2845 :                 extent_pages = calloc(blob->active.num_extent_pages, sizeof(*blob->active.extent_pages));
     578        2845 :                 if (!extent_pages) {
     579           0 :                         return -ENOMEM;
     580             :                 }
     581        2845 :                 memcpy(extent_pages, blob->active.extent_pages,
     582        2845 :                        blob->active.num_extent_pages * sizeof(*extent_pages));
     583             :         }
     584             : 
     585        8442 :         if (blob->active.num_clusters) {
     586        5918 :                 assert(blob->active.clusters);
     587        5918 :                 clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
     588        5918 :                 if (!clusters) {
     589           0 :                         free(extent_pages);
     590           0 :                         return -ENOMEM;
     591             :                 }
     592        5918 :                 memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
     593             :         }
     594             : 
     595        8442 :         if (blob->active.num_pages) {
     596        6958 :                 assert(blob->active.pages);
     597        6958 :                 pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
     598        6958 :                 if (!pages) {
     599           0 :                         free(extent_pages);
     600           0 :                         free(clusters);
     601           0 :                         return -ENOMEM;
     602             :                 }
     603        6958 :                 memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
     604             :         }
     605             : 
     606        8442 :         free(blob->clean.extent_pages);
     607        8442 :         free(blob->clean.clusters);
     608        8442 :         free(blob->clean.pages);
     609             : 
     610        8442 :         blob->clean.num_extent_pages = blob->active.num_extent_pages;
     611        8442 :         blob->clean.extent_pages = blob->active.extent_pages;
     612        8442 :         blob->clean.num_clusters = blob->active.num_clusters;
     613        8442 :         blob->clean.clusters = blob->active.clusters;
     614        8442 :         blob->clean.num_allocated_clusters = blob->active.num_allocated_clusters;
     615        8442 :         blob->clean.num_pages = blob->active.num_pages;
     616        8442 :         blob->clean.pages = blob->active.pages;
     617             : 
     618        8442 :         blob->active.extent_pages = extent_pages;
     619        8442 :         blob->active.clusters = clusters;
     620        8442 :         blob->active.pages = pages;
     621             : 
     622             :         /* If the metadata was dirtied again while the metadata was being written to disk,
     623             :          *  we do not want to revert the DIRTY state back to CLEAN here.
     624             :          */
     625        8442 :         if (blob->state == SPDK_BLOB_STATE_LOADING) {
     626        3402 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
     627             :         }
     628             : 
     629        8442 :         return 0;
     630             : }
     631             : 
     632             : static int
     633        1276 : blob_deserialize_xattr(struct spdk_blob *blob,
     634             :                        struct spdk_blob_md_descriptor_xattr *desc_xattr, bool internal)
     635             : {
     636             :         struct spdk_xattr                       *xattr;
     637             : 
     638        1276 :         if (desc_xattr->length != sizeof(desc_xattr->name_length) +
     639             :             sizeof(desc_xattr->value_length) +
     640        1276 :             desc_xattr->name_length + desc_xattr->value_length) {
     641           0 :                 return -EINVAL;
     642             :         }
     643             : 
     644        1276 :         xattr = calloc(1, sizeof(*xattr));
     645        1276 :         if (xattr == NULL) {
     646           0 :                 return -ENOMEM;
     647             :         }
     648             : 
     649        1276 :         xattr->name = malloc(desc_xattr->name_length + 1);
     650        1276 :         if (xattr->name == NULL) {
     651           0 :                 free(xattr);
     652           0 :                 return -ENOMEM;
     653             :         }
     654             : 
     655        1276 :         xattr->value = malloc(desc_xattr->value_length);
     656        1276 :         if (xattr->value == NULL) {
     657           0 :                 free(xattr->name);
     658           0 :                 free(xattr);
     659           0 :                 return -ENOMEM;
     660             :         }
     661             : 
     662        1276 :         memcpy(xattr->name, desc_xattr->name, desc_xattr->name_length);
     663        1276 :         xattr->name[desc_xattr->name_length] = '\0';
     664        1276 :         xattr->value_len = desc_xattr->value_length;
     665        1276 :         memcpy(xattr->value,
     666        1276 :                (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
     667        1276 :                desc_xattr->value_length);
     668             : 
     669        1276 :         TAILQ_INSERT_TAIL(internal ? &blob->xattrs_internal : &blob->xattrs, xattr, link);
     670             : 
     671        1276 :         return 0;
     672             : }
     673             : 
     674             : 
     675             : static int
     676        4580 : blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob)
     677             : {
     678             :         struct spdk_blob_md_descriptor *desc;
     679        4580 :         size_t  cur_desc = 0;
     680             :         void *tmp;
     681             : 
     682        4580 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
     683       13444 :         while (cur_desc < sizeof(page->descriptors)) {
     684       13444 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
     685        4532 :                         if (desc->length == 0) {
     686             :                                 /* If padding and length are 0, this terminates the page */
     687        4532 :                                 break;
     688             :                         }
     689        8912 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
     690             :                         struct spdk_blob_md_descriptor_flags    *desc_flags;
     691             : 
     692        3434 :                         desc_flags = (struct spdk_blob_md_descriptor_flags *)desc;
     693             : 
     694        3434 :                         if (desc_flags->length != sizeof(*desc_flags) - sizeof(*desc)) {
     695           0 :                                 return -EINVAL;
     696             :                         }
     697             : 
     698        3434 :                         if ((desc_flags->invalid_flags | SPDK_BLOB_INVALID_FLAGS_MASK) !=
     699             :                             SPDK_BLOB_INVALID_FLAGS_MASK) {
     700           8 :                                 return -EINVAL;
     701             :                         }
     702             : 
     703        3426 :                         if ((desc_flags->data_ro_flags | SPDK_BLOB_DATA_RO_FLAGS_MASK) !=
     704             :                             SPDK_BLOB_DATA_RO_FLAGS_MASK) {
     705          12 :                                 blob->data_ro = true;
     706          12 :                                 blob->md_ro = true;
     707             :                         }
     708             : 
     709        3426 :                         if ((desc_flags->md_ro_flags | SPDK_BLOB_MD_RO_FLAGS_MASK) !=
     710             :                             SPDK_BLOB_MD_RO_FLAGS_MASK) {
     711          12 :                                 blob->md_ro = true;
     712             :                         }
     713             : 
     714        3426 :                         if ((desc_flags->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
     715         562 :                                 blob->data_ro = true;
     716         562 :                                 blob->md_ro = true;
     717             :                         }
     718             : 
     719        3426 :                         blob->invalid_flags = desc_flags->invalid_flags;
     720        3426 :                         blob->data_ro_flags = desc_flags->data_ro_flags;
     721        3426 :                         blob->md_ro_flags = desc_flags->md_ro_flags;
     722             : 
     723        5478 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
     724             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
     725             :                         unsigned int                            i, j;
     726        1392 :                         unsigned int                            cluster_count = blob->active.num_clusters;
     727             : 
     728        1392 :                         if (blob->extent_table_found) {
     729             :                                 /* Extent Table already present in the md,
     730             :                                  * both descriptors should never be at the same time. */
     731           0 :                                 return -EINVAL;
     732             :                         }
     733        1392 :                         blob->extent_rle_found = true;
     734             : 
     735        1392 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
     736             : 
     737        1392 :                         if (desc_extent_rle->length == 0 ||
     738        1392 :                             (desc_extent_rle->length % sizeof(desc_extent_rle->extents[0]) != 0)) {
     739           0 :                                 return -EINVAL;
     740             :                         }
     741             : 
     742        2962 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     743       21238 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     744       19668 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     745        6692 :                                                 if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters,
     746        6692 :                                                                                 desc_extent_rle->extents[i].cluster_idx + j)) {
     747           0 :                                                         return -EINVAL;
     748             :                                                 }
     749             :                                         }
     750       19668 :                                         cluster_count++;
     751             :                                 }
     752             :                         }
     753             : 
     754        1392 :                         if (cluster_count == 0) {
     755           0 :                                 return -EINVAL;
     756             :                         }
     757        1392 :                         tmp = realloc(blob->active.clusters, cluster_count * sizeof(*blob->active.clusters));
     758        1392 :                         if (tmp == NULL) {
     759           0 :                                 return -ENOMEM;
     760             :                         }
     761        1392 :                         blob->active.clusters = tmp;
     762        1392 :                         blob->active.cluster_array_size = cluster_count;
     763             : 
     764        2962 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     765       21238 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     766       19668 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     767       13384 :                                                 blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     768        6692 :                                                                 desc_extent_rle->extents[i].cluster_idx + j);
     769        6692 :                                                 blob->active.num_allocated_clusters++;
     770       12976 :                                         } else if (spdk_blob_is_thin_provisioned(blob)) {
     771       12976 :                                                 blob->active.clusters[blob->active.num_clusters++] = 0;
     772             :                                         } else {
     773           0 :                                                 return -EINVAL;
     774             :                                         }
     775             :                                 }
     776             :                         }
     777        4086 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
     778             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
     779        1764 :                         uint32_t num_extent_pages = blob->active.num_extent_pages;
     780             :                         uint32_t i, j;
     781             :                         size_t extent_pages_length;
     782             : 
     783        1764 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
     784        1764 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
     785             : 
     786        1764 :                         if (blob->extent_rle_found) {
     787             :                                 /* This means that Extent RLE is present in MD,
     788             :                                  * both should never be at the same time. */
     789           0 :                                 return -EINVAL;
     790        1764 :                         } else if (blob->extent_table_found &&
     791           0 :                                    desc_extent_table->num_clusters != blob->remaining_clusters_in_et) {
     792             :                                 /* Number of clusters in this ET does not match number
     793             :                                  * from previously read EXTENT_TABLE. */
     794           0 :                                 return -EINVAL;
     795             :                         }
     796             : 
     797        1764 :                         if (desc_extent_table->length == 0 ||
     798        1764 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
     799           0 :                                 return -EINVAL;
     800             :                         }
     801             : 
     802        1764 :                         blob->extent_table_found = true;
     803             : 
     804        3238 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     805        1474 :                                 num_extent_pages += desc_extent_table->extent_page[i].num_pages;
     806             :                         }
     807             : 
     808        1764 :                         if (num_extent_pages > 0) {
     809        1458 :                                 tmp = realloc(blob->active.extent_pages, num_extent_pages * sizeof(uint32_t));
     810        1458 :                                 if (tmp == NULL) {
     811           0 :                                         return -ENOMEM;
     812             :                                 }
     813        1458 :                                 blob->active.extent_pages = tmp;
     814             :                         }
     815        1764 :                         blob->active.extent_pages_array_size = num_extent_pages;
     816             : 
     817        1764 :                         blob->remaining_clusters_in_et = desc_extent_table->num_clusters;
     818             : 
     819             :                         /* Extent table entries contain md page numbers for extent pages.
     820             :                          * Zeroes represent unallocated extent pages, those are run-length-encoded.
     821             :                          */
     822        3238 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     823        1474 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
     824        1052 :                                         assert(desc_extent_table->extent_page[i].num_pages == 1);
     825        1052 :                                         blob->active.extent_pages[blob->active.num_extent_pages++] =
     826        1052 :                                                 desc_extent_table->extent_page[i].page_idx;
     827         422 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     828         844 :                                         for (j = 0; j < desc_extent_table->extent_page[i].num_pages; j++) {
     829         422 :                                                 blob->active.extent_pages[blob->active.num_extent_pages++] = 0;
     830             :                                         }
     831             :                                 } else {
     832           0 :                                         return -EINVAL;
     833             :                                 }
     834             :                         }
     835        2322 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
     836             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
     837             :                         unsigned int                                    i;
     838        1046 :                         unsigned int                                    cluster_count = 0;
     839             :                         size_t                                          cluster_idx_length;
     840             : 
     841        1046 :                         if (blob->extent_rle_found) {
     842             :                                 /* This means that Extent RLE is present in MD,
     843             :                                  * both should never be at the same time. */
     844           0 :                                 return -EINVAL;
     845             :                         }
     846             : 
     847        1046 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
     848        1046 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
     849             : 
     850        1046 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
     851        1046 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
     852           0 :                                 return -EINVAL;
     853             :                         }
     854             : 
     855       16344 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     856       15298 :                                 if (desc_extent->cluster_idx[i] != 0) {
     857        6962 :                                         if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters, desc_extent->cluster_idx[i])) {
     858           0 :                                                 return -EINVAL;
     859             :                                         }
     860             :                                 }
     861       15298 :                                 cluster_count++;
     862             :                         }
     863             : 
     864        1046 :                         if (cluster_count == 0) {
     865           0 :                                 return -EINVAL;
     866             :                         }
     867             : 
     868             :                         /* When reading extent pages sequentially starting cluster idx should match
     869             :                          * current size of a blob.
     870             :                          * If changed to batch reading, this check shall be removed. */
     871        1046 :                         if (desc_extent->start_cluster_idx != blob->active.num_clusters) {
     872           0 :                                 return -EINVAL;
     873             :                         }
     874             : 
     875        1046 :                         tmp = realloc(blob->active.clusters,
     876        1046 :                                       (cluster_count + blob->active.num_clusters) * sizeof(*blob->active.clusters));
     877        1046 :                         if (tmp == NULL) {
     878           0 :                                 return -ENOMEM;
     879             :                         }
     880        1046 :                         blob->active.clusters = tmp;
     881        1046 :                         blob->active.cluster_array_size = (cluster_count + blob->active.num_clusters);
     882             : 
     883       16344 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     884       15298 :                                 if (desc_extent->cluster_idx[i] != 0) {
     885        6962 :                                         blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     886             :                                                         desc_extent->cluster_idx[i]);
     887        6962 :                                         blob->active.num_allocated_clusters++;
     888        8336 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     889        8336 :                                         blob->active.clusters[blob->active.num_clusters++] = 0;
     890             :                                 } else {
     891           0 :                                         return -EINVAL;
     892             :                                 }
     893             :                         }
     894        1046 :                         assert(desc_extent->start_cluster_idx + cluster_count == blob->active.num_clusters);
     895        1046 :                         assert(blob->remaining_clusters_in_et >= cluster_count);
     896        1046 :                         blob->remaining_clusters_in_et -= cluster_count;
     897        1276 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
     898             :                         int rc;
     899             : 
     900         394 :                         rc = blob_deserialize_xattr(blob,
     901             :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, false);
     902         394 :                         if (rc != 0) {
     903           0 :                                 return rc;
     904             :                         }
     905         882 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
     906             :                         int rc;
     907             : 
     908         882 :                         rc = blob_deserialize_xattr(blob,
     909             :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, true);
     910         882 :                         if (rc != 0) {
     911           0 :                                 return rc;
     912             :                         }
     913             :                 } else {
     914             :                         /* Unrecognized descriptor type.  Do not fail - just continue to the
     915             :                          *  next descriptor.  If this descriptor is associated with some feature
     916             :                          *  defined in a newer version of blobstore, that version of blobstore
     917             :                          *  should create and set an associated feature flag to specify if this
     918             :                          *  blob can be loaded or not.
     919             :                          */
     920             :                 }
     921             : 
     922             :                 /* Advance to the next descriptor */
     923        8904 :                 cur_desc += sizeof(*desc) + desc->length;
     924        8904 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
     925          40 :                         break;
     926             :                 }
     927        8864 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
     928             :         }
     929             : 
     930        4572 :         return 0;
     931             : }
     932             : 
     933             : static bool bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page);
     934             : 
     935             : static int
     936        1046 : blob_parse_extent_page(struct spdk_blob_md_page *extent_page, struct spdk_blob *blob)
     937             : {
     938        1046 :         assert(blob != NULL);
     939        1046 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     940             : 
     941        1046 :         if (bs_load_cur_extent_page_valid(extent_page) == false) {
     942           0 :                 return -ENOENT;
     943             :         }
     944             : 
     945        1046 :         return blob_parse_page(extent_page, blob);
     946             : }
     947             : 
     948             : static int
     949        3438 : blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count,
     950             :            struct spdk_blob *blob)
     951             : {
     952             :         const struct spdk_blob_md_page *page;
     953             :         uint32_t i;
     954             :         int rc;
     955             :         void *tmp;
     956             : 
     957        3438 :         assert(page_count > 0);
     958        3438 :         assert(pages[0].sequence_num == 0);
     959        3438 :         assert(blob != NULL);
     960        3438 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     961        3438 :         assert(blob->active.clusters == NULL);
     962             : 
     963             :         /* The blobid provided doesn't match what's in the MD, this can
     964             :          * happen for example if a bogus blobid is passed in through open.
     965             :          */
     966        3438 :         if (blob->id != pages[0].id) {
     967           4 :                 SPDK_ERRLOG("Blobid (0x%" PRIx64 ") doesn't match what's in metadata "
     968             :                             "(0x%" PRIx64 ")\n", blob->id, pages[0].id);
     969           4 :                 return -ENOENT;
     970             :         }
     971             : 
     972        3434 :         tmp = realloc(blob->active.pages, page_count * sizeof(*blob->active.pages));
     973        3434 :         if (!tmp) {
     974           0 :                 return -ENOMEM;
     975             :         }
     976        3434 :         blob->active.pages = tmp;
     977             : 
     978        3434 :         blob->active.pages[0] = pages[0].id;
     979             : 
     980        3534 :         for (i = 1; i < page_count; i++) {
     981         100 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, pages[i - 1].next));
     982         100 :                 blob->active.pages[i] = pages[i - 1].next;
     983             :         }
     984        3434 :         blob->active.num_pages = page_count;
     985             : 
     986        6960 :         for (i = 0; i < page_count; i++) {
     987        3534 :                 page = &pages[i];
     988             : 
     989        3534 :                 assert(page->id == blob->id);
     990        3534 :                 assert(page->sequence_num == i);
     991             : 
     992        3534 :                 rc = blob_parse_page(page, blob);
     993        3534 :                 if (rc != 0) {
     994           8 :                         return rc;
     995             :                 }
     996             :         }
     997             : 
     998        3426 :         return 0;
     999             : }
    1000             : 
    1001             : static int
    1002        4350 : blob_serialize_add_page(const struct spdk_blob *blob,
    1003             :                         struct spdk_blob_md_page **pages,
    1004             :                         uint32_t *page_count,
    1005             :                         struct spdk_blob_md_page **last_page)
    1006             : {
    1007             :         struct spdk_blob_md_page *page, *tmp_pages;
    1008             : 
    1009        4350 :         assert(pages != NULL);
    1010        4350 :         assert(page_count != NULL);
    1011             : 
    1012        4350 :         *last_page = NULL;
    1013        4350 :         if (*page_count == 0) {
    1014        4262 :                 assert(*pages == NULL);
    1015        4262 :                 *pages = spdk_malloc(SPDK_BS_PAGE_SIZE, 0,
    1016             :                                      NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    1017        4262 :                 if (*pages == NULL) {
    1018           0 :                         return -ENOMEM;
    1019             :                 }
    1020        4262 :                 *page_count = 1;
    1021             :         } else {
    1022          88 :                 assert(*pages != NULL);
    1023          88 :                 tmp_pages = spdk_realloc(*pages, SPDK_BS_PAGE_SIZE * (*page_count + 1), 0);
    1024          88 :                 if (tmp_pages == NULL) {
    1025           0 :                         return -ENOMEM;
    1026             :                 }
    1027          88 :                 (*page_count)++;
    1028          88 :                 *pages = tmp_pages;
    1029             :         }
    1030             : 
    1031        4350 :         page = &(*pages)[*page_count - 1];
    1032        4350 :         memset(page, 0, sizeof(*page));
    1033        4350 :         page->id = blob->id;
    1034        4350 :         page->sequence_num = *page_count - 1;
    1035        4350 :         page->next = SPDK_INVALID_MD_PAGE;
    1036        4350 :         *last_page = page;
    1037             : 
    1038        4350 :         return 0;
    1039             : }
    1040             : 
    1041             : /* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor.
    1042             :  * Update required_sz on both success and failure.
    1043             :  *
    1044             :  */
    1045             : static int
    1046        1771 : blob_serialize_xattr(const struct spdk_xattr *xattr,
    1047             :                      uint8_t *buf, size_t buf_sz,
    1048             :                      size_t *required_sz, bool internal)
    1049             : {
    1050             :         struct spdk_blob_md_descriptor_xattr    *desc;
    1051             : 
    1052        1771 :         *required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) +
    1053        1771 :                        strlen(xattr->name) +
    1054        1771 :                        xattr->value_len;
    1055             : 
    1056        1771 :         if (buf_sz < *required_sz) {
    1057          48 :                 return -1;
    1058             :         }
    1059             : 
    1060        1723 :         desc = (struct spdk_blob_md_descriptor_xattr *)buf;
    1061             : 
    1062        1723 :         desc->type = internal ? SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL : SPDK_MD_DESCRIPTOR_TYPE_XATTR;
    1063        1723 :         desc->length = sizeof(desc->name_length) +
    1064             :                        sizeof(desc->value_length) +
    1065        1723 :                        strlen(xattr->name) +
    1066        1723 :                        xattr->value_len;
    1067        1723 :         desc->name_length = strlen(xattr->name);
    1068        1723 :         desc->value_length = xattr->value_len;
    1069             : 
    1070        1723 :         memcpy(desc->name, xattr->name, desc->name_length);
    1071        1723 :         memcpy((void *)((uintptr_t)desc->name + desc->name_length),
    1072        1723 :                xattr->value,
    1073        1723 :                desc->value_length);
    1074             : 
    1075        1723 :         return 0;
    1076             : }
    1077             : 
    1078             : static void
    1079        1685 : blob_serialize_extent_table_entry(const struct spdk_blob *blob,
    1080             :                                   uint64_t start_ep, uint64_t *next_ep,
    1081             :                                   uint8_t **buf, size_t *remaining_sz)
    1082             : {
    1083             :         struct spdk_blob_md_descriptor_extent_table *desc;
    1084             :         size_t cur_sz;
    1085             :         uint64_t i, et_idx;
    1086             :         uint32_t extent_page, ep_len;
    1087             : 
    1088             :         /* The buffer must have room for at least num_clusters entry */
    1089        1685 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->num_clusters);
    1090        1685 :         if (*remaining_sz < cur_sz) {
    1091          20 :                 *next_ep = start_ep;
    1092          20 :                 return;
    1093             :         }
    1094             : 
    1095        1665 :         desc = (struct spdk_blob_md_descriptor_extent_table *)*buf;
    1096        1665 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE;
    1097             : 
    1098        1665 :         desc->num_clusters = blob->active.num_clusters;
    1099             : 
    1100        1665 :         ep_len = 1;
    1101        1665 :         et_idx = 0;
    1102        4236 :         for (i = start_ep; i < blob->active.num_extent_pages; i++) {
    1103        2571 :                 if (*remaining_sz < cur_sz  + sizeof(desc->extent_page[0])) {
    1104             :                         /* If we ran out of buffer space, return */
    1105           0 :                         break;
    1106             :                 }
    1107             : 
    1108        2571 :                 extent_page = blob->active.extent_pages[i];
    1109             :                 /* Verify that next extent_page is unallocated */
    1110        2571 :                 if (extent_page == 0 &&
    1111        1518 :                     (i + 1 < blob->active.num_extent_pages && blob->active.extent_pages[i + 1] == 0)) {
    1112        1078 :                         ep_len++;
    1113        1078 :                         continue;
    1114             :                 }
    1115        1493 :                 desc->extent_page[et_idx].page_idx = extent_page;
    1116        1493 :                 desc->extent_page[et_idx].num_pages = ep_len;
    1117        1493 :                 et_idx++;
    1118             : 
    1119        1493 :                 ep_len = 1;
    1120        1493 :                 cur_sz += sizeof(desc->extent_page[et_idx]);
    1121             :         }
    1122        1665 :         *next_ep = i;
    1123             : 
    1124        1665 :         desc->length = sizeof(desc->num_clusters) + sizeof(desc->extent_page[0]) * et_idx;
    1125        1665 :         *remaining_sz -= sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1126        1665 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1127             : }
    1128             : 
    1129             : static int
    1130        1667 : blob_serialize_extent_table(const struct spdk_blob *blob,
    1131             :                             struct spdk_blob_md_page **pages,
    1132             :                             struct spdk_blob_md_page *cur_page,
    1133             :                             uint32_t *page_count, uint8_t **buf,
    1134             :                             size_t *remaining_sz)
    1135             : {
    1136        1667 :         uint64_t                                last_extent_page;
    1137             :         int                                     rc;
    1138             : 
    1139        1667 :         last_extent_page = 0;
    1140             :         /* At least single extent table entry has to be always persisted.
    1141             :          * Such case occurs with num_extent_pages == 0. */
    1142        1685 :         while (last_extent_page <= blob->active.num_extent_pages) {
    1143        1685 :                 blob_serialize_extent_table_entry(blob, last_extent_page, &last_extent_page, buf,
    1144             :                                                   remaining_sz);
    1145             : 
    1146        1685 :                 if (last_extent_page == blob->active.num_extent_pages) {
    1147        1667 :                         break;
    1148             :                 }
    1149             : 
    1150          18 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1151          18 :                 if (rc < 0) {
    1152           0 :                         return rc;
    1153             :                 }
    1154             : 
    1155          18 :                 *buf = (uint8_t *)cur_page->descriptors;
    1156          18 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1157             :         }
    1158             : 
    1159        1667 :         return 0;
    1160             : }
    1161             : 
    1162             : static void
    1163        1737 : blob_serialize_extent_rle(const struct spdk_blob *blob,
    1164             :                           uint64_t start_cluster, uint64_t *next_cluster,
    1165             :                           uint8_t **buf, size_t *buf_sz)
    1166             : {
    1167             :         struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
    1168             :         size_t cur_sz;
    1169             :         uint64_t i, extent_idx;
    1170             :         uint64_t lba, lba_per_cluster, lba_count;
    1171             : 
    1172             :         /* The buffer must have room for at least one extent */
    1173        1737 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc_extent_rle->extents[0]);
    1174        1737 :         if (*buf_sz < cur_sz) {
    1175          18 :                 *next_cluster = start_cluster;
    1176          18 :                 return;
    1177             :         }
    1178             : 
    1179        1719 :         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)*buf;
    1180        1719 :         desc_extent_rle->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE;
    1181             : 
    1182        1719 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1183             :         /* Assert for scan-build false positive */
    1184        1719 :         assert(lba_per_cluster > 0);
    1185             : 
    1186        1719 :         lba = blob->active.clusters[start_cluster];
    1187        1719 :         lba_count = lba_per_cluster;
    1188        1719 :         extent_idx = 0;
    1189      810450 :         for (i = start_cluster + 1; i < blob->active.num_clusters; i++) {
    1190      808735 :                 if ((lba + lba_count) == blob->active.clusters[i] && lba != 0) {
    1191             :                         /* Run-length encode sequential non-zero LBA */
    1192        7276 :                         lba_count += lba_per_cluster;
    1193        7276 :                         continue;
    1194      801459 :                 } else if (lba == 0 && blob->active.clusters[i] == 0) {
    1195             :                         /* Run-length encode unallocated clusters */
    1196      800266 :                         lba_count += lba_per_cluster;
    1197      800266 :                         continue;
    1198             :                 }
    1199        1193 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1200        1193 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1201        1193 :                 extent_idx++;
    1202             : 
    1203        1193 :                 cur_sz += sizeof(desc_extent_rle->extents[extent_idx]);
    1204             : 
    1205        1193 :                 if (*buf_sz < cur_sz) {
    1206             :                         /* If we ran out of buffer space, return */
    1207           4 :                         *next_cluster = i;
    1208           4 :                         break;
    1209             :                 }
    1210             : 
    1211        1189 :                 lba = blob->active.clusters[i];
    1212        1189 :                 lba_count = lba_per_cluster;
    1213             :         }
    1214             : 
    1215        1719 :         if (*buf_sz >= cur_sz) {
    1216        1715 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1217        1715 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1218        1715 :                 extent_idx++;
    1219             : 
    1220        1715 :                 *next_cluster = blob->active.num_clusters;
    1221             :         }
    1222             : 
    1223        1719 :         desc_extent_rle->length = sizeof(desc_extent_rle->extents[0]) * extent_idx;
    1224        1719 :         *buf_sz -= sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1225        1719 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1226             : }
    1227             : 
    1228             : static int
    1229        1929 : blob_serialize_extents_rle(const struct spdk_blob *blob,
    1230             :                            struct spdk_blob_md_page **pages,
    1231             :                            struct spdk_blob_md_page *cur_page,
    1232             :                            uint32_t *page_count, uint8_t **buf,
    1233             :                            size_t *remaining_sz)
    1234             : {
    1235        1929 :         uint64_t                                last_cluster;
    1236             :         int                                     rc;
    1237             : 
    1238        1929 :         last_cluster = 0;
    1239        1951 :         while (last_cluster < blob->active.num_clusters) {
    1240        1737 :                 blob_serialize_extent_rle(blob, last_cluster, &last_cluster, buf, remaining_sz);
    1241             : 
    1242        1737 :                 if (last_cluster == blob->active.num_clusters) {
    1243        1715 :                         break;
    1244             :                 }
    1245             : 
    1246          22 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1247          22 :                 if (rc < 0) {
    1248           0 :                         return rc;
    1249             :                 }
    1250             : 
    1251          22 :                 *buf = (uint8_t *)cur_page->descriptors;
    1252          22 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1253             :         }
    1254             : 
    1255        1929 :         return 0;
    1256             : }
    1257             : 
    1258             : static void
    1259        1100 : blob_serialize_extent_page(const struct spdk_blob *blob,
    1260             :                            uint64_t cluster, struct spdk_blob_md_page *page)
    1261             : {
    1262             :         struct spdk_blob_md_descriptor_extent_page *desc_extent;
    1263             :         uint64_t i, extent_idx;
    1264             :         uint64_t lba, lba_per_cluster;
    1265        1100 :         uint64_t start_cluster_idx = (cluster / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    1266             : 
    1267        1100 :         desc_extent = (struct spdk_blob_md_descriptor_extent_page *) page->descriptors;
    1268        1100 :         desc_extent->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE;
    1269             : 
    1270        1100 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1271             : 
    1272        1100 :         desc_extent->start_cluster_idx = start_cluster_idx;
    1273        1100 :         extent_idx = 0;
    1274       42406 :         for (i = start_cluster_idx; i < blob->active.num_clusters; i++) {
    1275       41372 :                 lba = blob->active.clusters[i];
    1276       41372 :                 desc_extent->cluster_idx[extent_idx++] = lba / lba_per_cluster;
    1277       41372 :                 if (extent_idx >= SPDK_EXTENTS_PER_EP) {
    1278          66 :                         break;
    1279             :                 }
    1280             :         }
    1281        1100 :         desc_extent->length = sizeof(desc_extent->start_cluster_idx) +
    1282             :                               sizeof(desc_extent->cluster_idx[0]) * extent_idx;
    1283        1100 : }
    1284             : 
    1285             : static void
    1286        3596 : blob_serialize_flags(const struct spdk_blob *blob,
    1287             :                      uint8_t *buf, size_t *buf_sz)
    1288             : {
    1289             :         struct spdk_blob_md_descriptor_flags *desc;
    1290             : 
    1291             :         /*
    1292             :          * Flags get serialized first, so we should always have room for the flags
    1293             :          *  descriptor.
    1294             :          */
    1295        3596 :         assert(*buf_sz >= sizeof(*desc));
    1296             : 
    1297        3596 :         desc = (struct spdk_blob_md_descriptor_flags *)buf;
    1298        3596 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_FLAGS;
    1299        3596 :         desc->length = sizeof(*desc) - sizeof(struct spdk_blob_md_descriptor);
    1300        3596 :         desc->invalid_flags = blob->invalid_flags;
    1301        3596 :         desc->data_ro_flags = blob->data_ro_flags;
    1302        3596 :         desc->md_ro_flags = blob->md_ro_flags;
    1303             : 
    1304        3596 :         *buf_sz -= sizeof(*desc);
    1305        3596 : }
    1306             : 
    1307             : static int
    1308        7192 : blob_serialize_xattrs(const struct spdk_blob *blob,
    1309             :                       const struct spdk_xattr_tailq *xattrs, bool internal,
    1310             :                       struct spdk_blob_md_page **pages,
    1311             :                       struct spdk_blob_md_page *cur_page,
    1312             :                       uint32_t *page_count, uint8_t **buf,
    1313             :                       size_t *remaining_sz)
    1314             : {
    1315             :         const struct spdk_xattr *xattr;
    1316             :         int     rc;
    1317             : 
    1318        8915 :         TAILQ_FOREACH(xattr, xattrs, link) {
    1319        1723 :                 size_t required_sz = 0;
    1320             : 
    1321        1723 :                 rc = blob_serialize_xattr(xattr,
    1322             :                                           *buf, *remaining_sz,
    1323             :                                           &required_sz, internal);
    1324        1723 :                 if (rc < 0) {
    1325             :                         /* Need to add a new page to the chain */
    1326          48 :                         rc = blob_serialize_add_page(blob, pages, page_count,
    1327             :                                                      &cur_page);
    1328          48 :                         if (rc < 0) {
    1329           0 :                                 spdk_free(*pages);
    1330           0 :                                 *pages = NULL;
    1331           0 :                                 *page_count = 0;
    1332           0 :                                 return rc;
    1333             :                         }
    1334             : 
    1335          48 :                         *buf = (uint8_t *)cur_page->descriptors;
    1336          48 :                         *remaining_sz = sizeof(cur_page->descriptors);
    1337             : 
    1338             :                         /* Try again */
    1339          48 :                         required_sz = 0;
    1340          48 :                         rc = blob_serialize_xattr(xattr,
    1341             :                                                   *buf, *remaining_sz,
    1342             :                                                   &required_sz, internal);
    1343             : 
    1344          48 :                         if (rc < 0) {
    1345           0 :                                 spdk_free(*pages);
    1346           0 :                                 *pages = NULL;
    1347           0 :                                 *page_count = 0;
    1348           0 :                                 return rc;
    1349             :                         }
    1350             :                 }
    1351             : 
    1352        1723 :                 *remaining_sz -= required_sz;
    1353        1723 :                 *buf += required_sz;
    1354             :         }
    1355             : 
    1356        7192 :         return 0;
    1357             : }
    1358             : 
    1359             : static int
    1360        3596 : blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages,
    1361             :                uint32_t *page_count)
    1362             : {
    1363        3596 :         struct spdk_blob_md_page                *cur_page;
    1364             :         int                                     rc;
    1365        3596 :         uint8_t                                 *buf;
    1366        3596 :         size_t                                  remaining_sz;
    1367             : 
    1368        3596 :         assert(pages != NULL);
    1369        3596 :         assert(page_count != NULL);
    1370        3596 :         assert(blob != NULL);
    1371        3596 :         assert(blob->state == SPDK_BLOB_STATE_DIRTY);
    1372             : 
    1373        3596 :         *pages = NULL;
    1374        3596 :         *page_count = 0;
    1375             : 
    1376             :         /* A blob always has at least 1 page, even if it has no descriptors */
    1377        3596 :         rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1378        3596 :         if (rc < 0) {
    1379           0 :                 return rc;
    1380             :         }
    1381             : 
    1382        3596 :         buf = (uint8_t *)cur_page->descriptors;
    1383        3596 :         remaining_sz = sizeof(cur_page->descriptors);
    1384             : 
    1385             :         /* Serialize flags */
    1386        3596 :         blob_serialize_flags(blob, buf, &remaining_sz);
    1387        3596 :         buf += sizeof(struct spdk_blob_md_descriptor_flags);
    1388             : 
    1389             :         /* Serialize xattrs */
    1390        3596 :         rc = blob_serialize_xattrs(blob, &blob->xattrs, false,
    1391             :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1392        3596 :         if (rc < 0) {
    1393           0 :                 return rc;
    1394             :         }
    1395             : 
    1396             :         /* Serialize internal xattrs */
    1397        3596 :         rc = blob_serialize_xattrs(blob, &blob->xattrs_internal, true,
    1398             :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1399        3596 :         if (rc < 0) {
    1400           0 :                 return rc;
    1401             :         }
    1402             : 
    1403        3596 :         if (blob->use_extent_table) {
    1404             :                 /* Serialize extent table */
    1405        1667 :                 rc = blob_serialize_extent_table(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1406             :         } else {
    1407             :                 /* Serialize extents */
    1408        1929 :                 rc = blob_serialize_extents_rle(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1409             :         }
    1410             : 
    1411        3596 :         return rc;
    1412             : }
    1413             : 
    1414             : struct spdk_blob_load_ctx {
    1415             :         struct spdk_blob                *blob;
    1416             : 
    1417             :         struct spdk_blob_md_page        *pages;
    1418             :         uint32_t                        num_pages;
    1419             :         uint32_t                        next_extent_page;
    1420             :         spdk_bs_sequence_t              *seq;
    1421             : 
    1422             :         spdk_bs_sequence_cpl            cb_fn;
    1423             :         void                            *cb_arg;
    1424             : };
    1425             : 
    1426             : static uint32_t
    1427       19930 : blob_md_page_calc_crc(void *page)
    1428             : {
    1429             :         uint32_t                crc;
    1430             : 
    1431       19930 :         crc = BLOB_CRC32C_INITIAL;
    1432       19930 :         crc = spdk_crc32c_update(page, SPDK_BS_PAGE_SIZE - 4, crc);
    1433       19930 :         crc ^= BLOB_CRC32C_INITIAL;
    1434             : 
    1435       19930 :         return crc;
    1436             : 
    1437             : }
    1438             : 
    1439             : static void
    1440        3466 : blob_load_final(struct spdk_blob_load_ctx *ctx, int bserrno)
    1441             : {
    1442        3466 :         struct spdk_blob                *blob = ctx->blob;
    1443             : 
    1444        3466 :         if (bserrno == 0) {
    1445        3402 :                 blob_mark_clean(blob);
    1446             :         }
    1447             : 
    1448        3466 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, bserrno);
    1449             : 
    1450             :         /* Free the memory */
    1451        3466 :         spdk_free(ctx->pages);
    1452        3466 :         free(ctx);
    1453        3466 : }
    1454             : 
    1455             : static void
    1456         454 : blob_load_snapshot_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    1457             : {
    1458         454 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1459         454 :         struct spdk_blob                *blob = ctx->blob;
    1460             : 
    1461         454 :         if (bserrno == 0) {
    1462         448 :                 blob->back_bs_dev = bs_create_blob_bs_dev(snapshot);
    1463         448 :                 if (blob->back_bs_dev == NULL) {
    1464           0 :                         bserrno = -ENOMEM;
    1465             :                 }
    1466             :         }
    1467         454 :         if (bserrno != 0) {
    1468           6 :                 SPDK_ERRLOG("Snapshot fail\n");
    1469             :         }
    1470             : 
    1471         454 :         blob_load_final(ctx, bserrno);
    1472         454 : }
    1473             : 
    1474             : static void blob_update_clear_method(struct spdk_blob *blob);
    1475             : 
    1476             : static int
    1477         120 : blob_load_esnap(struct spdk_blob *blob, void *blob_ctx)
    1478             : {
    1479         120 :         struct spdk_blob_store *bs = blob->bs;
    1480         120 :         struct spdk_bs_dev *bs_dev = NULL;
    1481         120 :         const void *esnap_id = NULL;
    1482         120 :         size_t id_len = 0;
    1483             :         int rc;
    1484             : 
    1485         120 :         if (bs->esnap_bs_dev_create == NULL) {
    1486           8 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " is an esnap clone but the blobstore was opened "
    1487             :                                "without support for esnap clones\n", blob->id);
    1488           8 :                 return -ENOTSUP;
    1489             :         }
    1490         112 :         assert(blob->back_bs_dev == NULL);
    1491             : 
    1492         112 :         rc = blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, &esnap_id, &id_len, true);
    1493         112 :         if (rc != 0) {
    1494           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " is an esnap clone but has no esnap ID\n", blob->id);
    1495           0 :                 return -EINVAL;
    1496             :         }
    1497         112 :         assert(id_len > 0 && id_len < UINT32_MAX);
    1498             : 
    1499         112 :         SPDK_INFOLOG(blob, "Creating external snapshot device\n");
    1500             : 
    1501         112 :         rc = bs->esnap_bs_dev_create(bs->esnap_ctx, blob_ctx, blob, esnap_id, (uint32_t)id_len,
    1502             :                                      &bs_dev);
    1503         112 :         if (rc != 0) {
    1504           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": failed to load back_bs_dev "
    1505             :                               "with error %d\n", blob->id, rc);
    1506           0 :                 return rc;
    1507             :         }
    1508             : 
    1509             :         /*
    1510             :          * Note: bs_dev might be NULL if the consumer chose to not open the external snapshot.
    1511             :          * This especially might happen during spdk_bs_load() iteration.
    1512             :          */
    1513         112 :         if (bs_dev != NULL) {
    1514         112 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": loaded back_bs_dev\n", blob->id);
    1515         112 :                 if ((bs->io_unit_size % bs_dev->blocklen) != 0) {
    1516           4 :                         SPDK_NOTICELOG("blob 0x%" PRIx64 " external snapshot device block size %u "
    1517             :                                        "is not compatible with blobstore block size %u\n",
    1518             :                                        blob->id, bs_dev->blocklen, bs->io_unit_size);
    1519           4 :                         bs_dev->destroy(bs_dev);
    1520           4 :                         return -EINVAL;
    1521             :                 }
    1522             :         }
    1523             : 
    1524         108 :         blob->back_bs_dev = bs_dev;
    1525         108 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    1526             : 
    1527         108 :         return 0;
    1528             : }
    1529             : 
    1530             : static void
    1531        3420 : blob_load_backing_dev(spdk_bs_sequence_t *seq, void *cb_arg)
    1532             : {
    1533        3420 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1534        3420 :         struct spdk_blob                *blob = ctx->blob;
    1535        3420 :         const void                      *value;
    1536        3420 :         size_t                          len;
    1537             :         int                             rc;
    1538             : 
    1539        3420 :         if (blob_is_esnap_clone(blob)) {
    1540         120 :                 rc = blob_load_esnap(blob, seq->cpl.u.blob_handle.esnap_ctx);
    1541         120 :                 blob_load_final(ctx, rc);
    1542         120 :                 return;
    1543             :         }
    1544             : 
    1545        3300 :         if (spdk_blob_is_thin_provisioned(blob)) {
    1546        1030 :                 rc = blob_get_xattr_value(blob, BLOB_SNAPSHOT, &value, &len, true);
    1547        1030 :                 if (rc == 0) {
    1548         454 :                         if (len != sizeof(spdk_blob_id)) {
    1549           0 :                                 blob_load_final(ctx, -EINVAL);
    1550           0 :                                 return;
    1551             :                         }
    1552             :                         /* open snapshot blob and continue in the callback function */
    1553         454 :                         blob->parent_id = *(spdk_blob_id *)value;
    1554         454 :                         spdk_bs_open_blob(blob->bs, blob->parent_id,
    1555             :                                           blob_load_snapshot_cpl, ctx);
    1556         454 :                         return;
    1557             :                 } else {
    1558             :                         /* add zeroes_dev for thin provisioned blob */
    1559         576 :                         blob->back_bs_dev = bs_create_zeroes_dev();
    1560             :                 }
    1561             :         } else {
    1562             :                 /* standard blob */
    1563        2270 :                 blob->back_bs_dev = NULL;
    1564             :         }
    1565        2846 :         blob_load_final(ctx, 0);
    1566             : }
    1567             : 
    1568             : static void
    1569        2816 : blob_load_cpl_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1570             : {
    1571        2816 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1572        2816 :         struct spdk_blob                *blob = ctx->blob;
    1573             :         struct spdk_blob_md_page        *page;
    1574             :         uint64_t                        i;
    1575             :         uint32_t                        crc;
    1576             :         uint64_t                        lba;
    1577             :         void                            *tmp;
    1578             :         uint64_t                        sz;
    1579             : 
    1580        2816 :         if (bserrno) {
    1581           6 :                 SPDK_ERRLOG("Extent page read failed: %d\n", bserrno);
    1582           6 :                 blob_load_final(ctx, bserrno);
    1583           6 :                 return;
    1584             :         }
    1585             : 
    1586        2810 :         if (ctx->pages == NULL) {
    1587             :                 /* First iteration of this function, allocate buffer for single EXTENT_PAGE */
    1588        1764 :                 ctx->pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    1589             :                                           NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    1590        1764 :                 if (!ctx->pages) {
    1591           0 :                         blob_load_final(ctx, -ENOMEM);
    1592           0 :                         return;
    1593             :                 }
    1594        1764 :                 ctx->num_pages = 1;
    1595        1764 :                 ctx->next_extent_page = 0;
    1596             :         } else {
    1597        1046 :                 page = &ctx->pages[0];
    1598        1046 :                 crc = blob_md_page_calc_crc(page);
    1599        1046 :                 if (crc != page->crc) {
    1600           0 :                         blob_load_final(ctx, -EINVAL);
    1601           0 :                         return;
    1602             :                 }
    1603             : 
    1604        1046 :                 if (page->next != SPDK_INVALID_MD_PAGE) {
    1605           0 :                         blob_load_final(ctx, -EINVAL);
    1606           0 :                         return;
    1607             :                 }
    1608             : 
    1609        1046 :                 bserrno = blob_parse_extent_page(page, blob);
    1610        1046 :                 if (bserrno) {
    1611           0 :                         blob_load_final(ctx, bserrno);
    1612           0 :                         return;
    1613             :                 }
    1614             :         }
    1615             : 
    1616        3232 :         for (i = ctx->next_extent_page; i < blob->active.num_extent_pages; i++) {
    1617        1474 :                 if (blob->active.extent_pages[i] != 0) {
    1618             :                         /* Extent page was allocated, read and parse it. */
    1619        1052 :                         lba = bs_md_page_to_lba(blob->bs, blob->active.extent_pages[i]);
    1620        1052 :                         ctx->next_extent_page = i + 1;
    1621             : 
    1622        1052 :                         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1623        1052 :                                              bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
    1624             :                                              blob_load_cpl_extents_cpl, ctx);
    1625        1052 :                         return;
    1626             :                 } else {
    1627             :                         /* Thin provisioned blobs can point to unallocated extent pages.
    1628             :                          * In this case blob size should be increased by up to the amount left in remaining_clusters_in_et. */
    1629             : 
    1630         422 :                         sz = spdk_min(blob->remaining_clusters_in_et, SPDK_EXTENTS_PER_EP);
    1631         422 :                         blob->active.num_clusters += sz;
    1632         422 :                         blob->remaining_clusters_in_et -= sz;
    1633             : 
    1634         422 :                         assert(spdk_blob_is_thin_provisioned(blob));
    1635         422 :                         assert(i + 1 < blob->active.num_extent_pages || blob->remaining_clusters_in_et == 0);
    1636             : 
    1637         422 :                         tmp = realloc(blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
    1638         422 :                         if (tmp == NULL) {
    1639           0 :                                 blob_load_final(ctx, -ENOMEM);
    1640           0 :                                 return;
    1641             :                         }
    1642         422 :                         memset(tmp + sizeof(*blob->active.clusters) * blob->active.cluster_array_size, 0,
    1643         422 :                                sizeof(*blob->active.clusters) * (blob->active.num_clusters - blob->active.cluster_array_size));
    1644         422 :                         blob->active.clusters = tmp;
    1645         422 :                         blob->active.cluster_array_size = blob->active.num_clusters;
    1646             :                 }
    1647             :         }
    1648             : 
    1649        1758 :         blob_load_backing_dev(seq, ctx);
    1650             : }
    1651             : 
    1652             : static void
    1653        3566 : blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1654             : {
    1655        3566 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1656        3566 :         struct spdk_blob                *blob = ctx->blob;
    1657             :         struct spdk_blob_md_page        *page;
    1658             :         int                             rc;
    1659             :         uint32_t                        crc;
    1660             :         uint32_t                        current_page;
    1661             : 
    1662        3566 :         if (ctx->num_pages == 1) {
    1663        3466 :                 current_page = bs_blobid_to_page(blob->id);
    1664             :         } else {
    1665         100 :                 assert(ctx->num_pages != 0);
    1666         100 :                 page = &ctx->pages[ctx->num_pages - 2];
    1667         100 :                 current_page = page->next;
    1668             :         }
    1669             : 
    1670        3566 :         if (bserrno) {
    1671          20 :                 SPDK_ERRLOG("Metadata page %d read failed for blobid 0x%" PRIx64 ": %d\n",
    1672             :                             current_page, blob->id, bserrno);
    1673          20 :                 blob_load_final(ctx, bserrno);
    1674          20 :                 return;
    1675             :         }
    1676             : 
    1677        3546 :         page = &ctx->pages[ctx->num_pages - 1];
    1678        3546 :         crc = blob_md_page_calc_crc(page);
    1679        3546 :         if (crc != page->crc) {
    1680           8 :                 SPDK_ERRLOG("Metadata page %d crc mismatch for blobid 0x%" PRIx64 "\n",
    1681             :                             current_page, blob->id);
    1682           8 :                 blob_load_final(ctx, -EINVAL);
    1683           8 :                 return;
    1684             :         }
    1685             : 
    1686        3538 :         if (page->next != SPDK_INVALID_MD_PAGE) {
    1687             :                 struct spdk_blob_md_page *tmp_pages;
    1688         100 :                 uint32_t next_page = page->next;
    1689         100 :                 uint64_t next_lba = bs_md_page_to_lba(blob->bs, next_page);
    1690             : 
    1691             :                 /* Read the next page */
    1692         100 :                 tmp_pages = spdk_realloc(ctx->pages, (sizeof(*page) * (ctx->num_pages + 1)), 0);
    1693         100 :                 if (tmp_pages == NULL) {
    1694           0 :                         blob_load_final(ctx, -ENOMEM);
    1695           0 :                         return;
    1696             :                 }
    1697         100 :                 ctx->num_pages++;
    1698         100 :                 ctx->pages = tmp_pages;
    1699             : 
    1700         100 :                 bs_sequence_read_dev(seq, &ctx->pages[ctx->num_pages - 1],
    1701             :                                      next_lba,
    1702         100 :                                      bs_byte_to_lba(blob->bs, sizeof(*page)),
    1703             :                                      blob_load_cpl, ctx);
    1704         100 :                 return;
    1705             :         }
    1706             : 
    1707             :         /* Parse the pages */
    1708        3438 :         rc = blob_parse(ctx->pages, ctx->num_pages, blob);
    1709        3438 :         if (rc) {
    1710          12 :                 blob_load_final(ctx, rc);
    1711          12 :                 return;
    1712             :         }
    1713             : 
    1714        3426 :         if (blob->extent_table_found == true) {
    1715             :                 /* If EXTENT_TABLE was found, that means support for it should be enabled. */
    1716        1764 :                 assert(blob->extent_rle_found == false);
    1717        1764 :                 blob->use_extent_table = true;
    1718             :         } else {
    1719             :                 /* If EXTENT_RLE or no extent_* descriptor was found disable support
    1720             :                  * for extent table. No extent_* descriptors means that blob has length of 0
    1721             :                  * and no extent_rle descriptors were persisted for it.
    1722             :                  * EXTENT_TABLE if used, is always present in metadata regardless of length. */
    1723        1662 :                 blob->use_extent_table = false;
    1724             :         }
    1725             : 
    1726             :         /* Check the clear_method stored in metadata vs what may have been passed
    1727             :          * via spdk_bs_open_blob_ext() and update accordingly.
    1728             :          */
    1729        3426 :         blob_update_clear_method(blob);
    1730             : 
    1731        3426 :         spdk_free(ctx->pages);
    1732        3426 :         ctx->pages = NULL;
    1733             : 
    1734        3426 :         if (blob->extent_table_found) {
    1735        1764 :                 blob_load_cpl_extents_cpl(seq, ctx, 0);
    1736             :         } else {
    1737        1662 :                 blob_load_backing_dev(seq, ctx);
    1738             :         }
    1739             : }
    1740             : 
    1741             : /* Load a blob from disk given a blobid */
    1742             : static void
    1743        3466 : blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    1744             :           spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    1745             : {
    1746             :         struct spdk_blob_load_ctx *ctx;
    1747             :         struct spdk_blob_store *bs;
    1748             :         uint32_t page_num;
    1749             :         uint64_t lba;
    1750             : 
    1751        3466 :         blob_verify_md_op(blob);
    1752             : 
    1753        3466 :         bs = blob->bs;
    1754             : 
    1755        3466 :         ctx = calloc(1, sizeof(*ctx));
    1756        3466 :         if (!ctx) {
    1757           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1758           0 :                 return;
    1759             :         }
    1760             : 
    1761        3466 :         ctx->blob = blob;
    1762        3466 :         ctx->pages = spdk_realloc(ctx->pages, SPDK_BS_PAGE_SIZE, 0);
    1763        3466 :         if (!ctx->pages) {
    1764           0 :                 free(ctx);
    1765           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1766           0 :                 return;
    1767             :         }
    1768        3466 :         ctx->num_pages = 1;
    1769        3466 :         ctx->cb_fn = cb_fn;
    1770        3466 :         ctx->cb_arg = cb_arg;
    1771        3466 :         ctx->seq = seq;
    1772             : 
    1773        3466 :         page_num = bs_blobid_to_page(blob->id);
    1774        3466 :         lba = bs_md_page_to_lba(blob->bs, page_num);
    1775             : 
    1776        3466 :         blob->state = SPDK_BLOB_STATE_LOADING;
    1777             : 
    1778        3466 :         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1779        3466 :                              bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE),
    1780             :                              blob_load_cpl, ctx);
    1781             : }
    1782             : 
    1783             : struct spdk_blob_persist_ctx {
    1784             :         struct spdk_blob                *blob;
    1785             : 
    1786             :         struct spdk_blob_md_page        *pages;
    1787             :         uint32_t                        next_extent_page;
    1788             :         struct spdk_blob_md_page        *extent_page;
    1789             : 
    1790             :         spdk_bs_sequence_t              *seq;
    1791             :         spdk_bs_sequence_cpl            cb_fn;
    1792             :         void                            *cb_arg;
    1793             :         TAILQ_ENTRY(spdk_blob_persist_ctx) link;
    1794             : };
    1795             : 
    1796             : static void
    1797        1262 : bs_batch_clear_dev(struct spdk_blob *blob, spdk_bs_batch_t *batch, uint64_t lba,
    1798             :                    uint64_t lba_count)
    1799             : {
    1800        1262 :         switch (blob->clear_method) {
    1801        1262 :         case BLOB_CLEAR_WITH_DEFAULT:
    1802             :         case BLOB_CLEAR_WITH_UNMAP:
    1803        1262 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    1804        1262 :                 break;
    1805           0 :         case BLOB_CLEAR_WITH_WRITE_ZEROES:
    1806           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1807           0 :                 break;
    1808           0 :         case BLOB_CLEAR_WITH_NONE:
    1809             :         default:
    1810           0 :                 break;
    1811             :         }
    1812        1262 : }
    1813             : 
    1814             : static int
    1815        1152 : bs_super_validate(struct spdk_bs_super_block *super, struct spdk_blob_store *bs)
    1816             : {
    1817             :         uint32_t        crc;
    1818             :         static const char zeros[SPDK_BLOBSTORE_TYPE_LENGTH];
    1819             : 
    1820        1152 :         if (super->version > SPDK_BS_VERSION ||
    1821        1148 :             super->version < SPDK_BS_INITIAL_VERSION) {
    1822           8 :                 return -EILSEQ;
    1823             :         }
    1824             : 
    1825        1144 :         if (memcmp(super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    1826             :                    sizeof(super->signature)) != 0) {
    1827           0 :                 return -EILSEQ;
    1828             :         }
    1829             : 
    1830        1144 :         crc = blob_md_page_calc_crc(super);
    1831        1144 :         if (crc != super->crc) {
    1832           4 :                 return -EILSEQ;
    1833             :         }
    1834             : 
    1835        1140 :         if (memcmp(&bs->bstype, &super->bstype, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1836        1126 :                 SPDK_DEBUGLOG(blob, "Bstype matched - loading blobstore\n");
    1837          14 :         } else if (memcmp(&bs->bstype, zeros, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1838           6 :                 SPDK_DEBUGLOG(blob, "Bstype wildcard used - loading blobstore regardless bstype\n");
    1839             :         } else {
    1840           8 :                 SPDK_DEBUGLOG(blob, "Unexpected bstype\n");
    1841           8 :                 SPDK_LOGDUMP(blob, "Expected:", bs->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1842           8 :                 SPDK_LOGDUMP(blob, "Found:", super->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1843           8 :                 return -ENXIO;
    1844             :         }
    1845             : 
    1846        1132 :         if (super->size > bs->dev->blockcnt * bs->dev->blocklen) {
    1847           8 :                 SPDK_NOTICELOG("Size mismatch, dev size: %" PRIu64 ", blobstore size: %" PRIu64 "\n",
    1848             :                                bs->dev->blockcnt * bs->dev->blocklen, super->size);
    1849           8 :                 return -EILSEQ;
    1850             :         }
    1851             : 
    1852        1124 :         return 0;
    1853             : }
    1854             : 
    1855             : static void bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    1856             :                           spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    1857             : 
    1858             : static void
    1859        5092 : blob_persist_complete_cb(void *arg)
    1860             : {
    1861        5092 :         struct spdk_blob_persist_ctx *ctx = arg;
    1862             : 
    1863             :         /* Call user callback */
    1864        5092 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, 0);
    1865             : 
    1866             :         /* Free the memory */
    1867        5092 :         spdk_free(ctx->pages);
    1868        5092 :         free(ctx);
    1869        5092 : }
    1870             : 
    1871             : static void blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
    1872             : 
    1873             : static void
    1874        5092 : blob_persist_complete(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx, int bserrno)
    1875             : {
    1876             :         struct spdk_blob_persist_ctx    *next_persist, *tmp;
    1877        5092 :         struct spdk_blob                *blob = ctx->blob;
    1878             : 
    1879        5092 :         if (bserrno == 0) {
    1880        5040 :                 blob_mark_clean(blob);
    1881             :         }
    1882             : 
    1883        5092 :         assert(ctx == TAILQ_FIRST(&blob->persists_to_complete));
    1884             : 
    1885             :         /* Complete all persists that were pending when the current persist started */
    1886       10184 :         TAILQ_FOREACH_SAFE(next_persist, &blob->persists_to_complete, link, tmp) {
    1887        5092 :                 TAILQ_REMOVE(&blob->persists_to_complete, next_persist, link);
    1888        5092 :                 spdk_thread_send_msg(spdk_get_thread(), blob_persist_complete_cb, next_persist);
    1889             :         }
    1890             : 
    1891        5092 :         if (TAILQ_EMPTY(&blob->pending_persists)) {
    1892        5069 :                 return;
    1893             :         }
    1894             : 
    1895             :         /* Queue up all pending persists for completion and start blob persist with first one */
    1896          23 :         TAILQ_SWAP(&blob->persists_to_complete, &blob->pending_persists, spdk_blob_persist_ctx, link);
    1897          23 :         next_persist = TAILQ_FIRST(&blob->persists_to_complete);
    1898             : 
    1899          23 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    1900          23 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, next_persist);
    1901             : }
    1902             : 
    1903             : static void
    1904        5040 : blob_persist_clear_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1905             : {
    1906        5040 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1907        5040 :         struct spdk_blob                *blob = ctx->blob;
    1908        5040 :         struct spdk_blob_store          *bs = blob->bs;
    1909             :         size_t                          i;
    1910             : 
    1911        5040 :         if (bserrno != 0) {
    1912           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1913           0 :                 return;
    1914             :         }
    1915             : 
    1916        5040 :         spdk_spin_lock(&bs->used_lock);
    1917             : 
    1918             :         /* Release all extent_pages that were truncated */
    1919        6774 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1920             :                 /* Nothing to release if it was not allocated */
    1921        1734 :                 if (blob->active.extent_pages[i] != 0) {
    1922         626 :                         bs_release_md_page(bs, blob->active.extent_pages[i]);
    1923             :                 }
    1924             :         }
    1925             : 
    1926        5040 :         spdk_spin_unlock(&bs->used_lock);
    1927             : 
    1928        5040 :         if (blob->active.num_extent_pages == 0) {
    1929        3637 :                 free(blob->active.extent_pages);
    1930        3637 :                 blob->active.extent_pages = NULL;
    1931        3637 :                 blob->active.extent_pages_array_size = 0;
    1932        1403 :         } else if (blob->active.num_extent_pages != blob->active.extent_pages_array_size) {
    1933             : #ifndef __clang_analyzer__
    1934             :                 void *tmp;
    1935             : 
    1936             :                 /* scan-build really can't figure reallocs, workaround it */
    1937           2 :                 tmp = realloc(blob->active.extent_pages, sizeof(uint32_t) * blob->active.num_extent_pages);
    1938           2 :                 assert(tmp != NULL);
    1939           2 :                 blob->active.extent_pages = tmp;
    1940             : #endif
    1941           2 :                 blob->active.extent_pages_array_size = blob->active.num_extent_pages;
    1942             :         }
    1943             : 
    1944        5040 :         blob_persist_complete(seq, ctx, bserrno);
    1945             : }
    1946             : 
    1947             : static void
    1948        5040 : blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    1949             : {
    1950        5040 :         struct spdk_blob                *blob = ctx->blob;
    1951        5040 :         struct spdk_blob_store          *bs = blob->bs;
    1952             :         size_t                          i;
    1953             :         uint64_t                        lba;
    1954             :         uint64_t                        lba_count;
    1955             :         spdk_bs_batch_t                 *batch;
    1956             : 
    1957        5040 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx);
    1958        5040 :         lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
    1959             : 
    1960             :         /* Clear all extent_pages that were truncated */
    1961        6774 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1962             :                 /* Nothing to clear if it was not allocated */
    1963        1734 :                 if (blob->active.extent_pages[i] != 0) {
    1964         626 :                         lba = bs_md_page_to_lba(bs, blob->active.extent_pages[i]);
    1965         626 :                         bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1966             :                 }
    1967             :         }
    1968             : 
    1969        5040 :         bs_batch_close(batch);
    1970        5040 : }
    1971             : 
    1972             : static void
    1973        5040 : blob_persist_clear_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1974             : {
    1975        5040 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1976        5040 :         struct spdk_blob                *blob = ctx->blob;
    1977        5040 :         struct spdk_blob_store          *bs = blob->bs;
    1978             :         size_t                          i;
    1979             : 
    1980        5040 :         if (bserrno != 0) {
    1981           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1982           0 :                 return;
    1983             :         }
    1984             : 
    1985        5040 :         spdk_spin_lock(&bs->used_lock);
    1986             :         /* Release all clusters that were truncated */
    1987     1074047 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    1988     1069007 :                 uint32_t cluster_num = bs_lba_to_cluster(bs, blob->active.clusters[i]);
    1989             : 
    1990             :                 /* Nothing to release if it was not allocated */
    1991     1069007 :                 if (blob->active.clusters[i] != 0) {
    1992        2343 :                         bs_release_cluster(bs, cluster_num);
    1993             :                 }
    1994             :         }
    1995        5040 :         spdk_spin_unlock(&bs->used_lock);
    1996             : 
    1997        5040 :         if (blob->active.num_clusters == 0) {
    1998        1940 :                 free(blob->active.clusters);
    1999        1940 :                 blob->active.clusters = NULL;
    2000        1940 :                 blob->active.cluster_array_size = 0;
    2001        3100 :         } else if (blob->active.num_clusters != blob->active.cluster_array_size) {
    2002             : #ifndef __clang_analyzer__
    2003             :                 void *tmp;
    2004             : 
    2005             :                 /* scan-build really can't figure reallocs, workaround it */
    2006          14 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * blob->active.num_clusters);
    2007          14 :                 assert(tmp != NULL);
    2008          14 :                 blob->active.clusters = tmp;
    2009             : 
    2010             : #endif
    2011          14 :                 blob->active.cluster_array_size = blob->active.num_clusters;
    2012             :         }
    2013             : 
    2014             :         /* Move on to clearing extent pages */
    2015        5040 :         blob_persist_clear_extents(seq, ctx);
    2016             : }
    2017             : 
    2018             : static void
    2019        5040 : blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2020             : {
    2021        5040 :         struct spdk_blob                *blob = ctx->blob;
    2022        5040 :         struct spdk_blob_store          *bs = blob->bs;
    2023             :         spdk_bs_batch_t                 *batch;
    2024             :         size_t                          i;
    2025             :         uint64_t                        lba;
    2026             :         uint64_t                        lba_count;
    2027             : 
    2028             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2029             :          * at the end, but no changes ever occur in the middle of the list.
    2030             :          */
    2031             : 
    2032        5040 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_clusters_cpl, ctx);
    2033             : 
    2034             :         /* Clear all clusters that were truncated */
    2035        5040 :         lba = 0;
    2036        5040 :         lba_count = 0;
    2037     1074047 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    2038     1069007 :                 uint64_t next_lba = blob->active.clusters[i];
    2039     1069007 :                 uint64_t next_lba_count = bs_cluster_to_lba(bs, 1);
    2040             : 
    2041     1069007 :                 if (next_lba > 0 && (lba + lba_count) == next_lba) {
    2042             :                         /* This cluster is contiguous with the previous one. */
    2043        1085 :                         lba_count += next_lba_count;
    2044        1085 :                         continue;
    2045     1067922 :                 } else if (next_lba == 0) {
    2046     1066664 :                         continue;
    2047             :                 }
    2048             : 
    2049             :                 /* This cluster is not contiguous with the previous one. */
    2050             : 
    2051             :                 /* If a run of LBAs previously existing, clear them now */
    2052        1258 :                 if (lba_count > 0) {
    2053          36 :                         bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2054             :                 }
    2055             : 
    2056             :                 /* Start building the next batch */
    2057        1258 :                 lba = next_lba;
    2058        1258 :                 if (next_lba > 0) {
    2059        1258 :                         lba_count = next_lba_count;
    2060             :                 } else {
    2061           0 :                         lba_count = 0;
    2062             :                 }
    2063             :         }
    2064             : 
    2065             :         /* If we ended with a contiguous set of LBAs, clear them now */
    2066        5040 :         if (lba_count > 0) {
    2067        1222 :                 bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2068             :         }
    2069             : 
    2070        5040 :         bs_batch_close(batch);
    2071        5040 : }
    2072             : 
    2073             : static void
    2074        5044 : blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2075             : {
    2076        5044 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2077        5044 :         struct spdk_blob                *blob = ctx->blob;
    2078        5044 :         struct spdk_blob_store          *bs = blob->bs;
    2079             :         size_t                          i;
    2080             : 
    2081        5044 :         if (bserrno != 0) {
    2082           4 :                 blob_persist_complete(seq, ctx, bserrno);
    2083           4 :                 return;
    2084             :         }
    2085             : 
    2086        5040 :         spdk_spin_lock(&bs->used_lock);
    2087             : 
    2088             :         /* This loop starts at 1 because the first page is special and handled
    2089             :          * below. The pages (except the first) are never written in place,
    2090             :          * so any pages in the clean list must be zeroed.
    2091             :          */
    2092        5108 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2093          68 :                 bs_release_md_page(bs, blob->clean.pages[i]);
    2094             :         }
    2095             : 
    2096        5040 :         if (blob->active.num_pages == 0) {
    2097             :                 uint32_t page_num;
    2098             : 
    2099        1484 :                 page_num = bs_blobid_to_page(blob->id);
    2100        1484 :                 bs_release_md_page(bs, page_num);
    2101             :         }
    2102             : 
    2103        5040 :         spdk_spin_unlock(&bs->used_lock);
    2104             : 
    2105             :         /* Move on to clearing clusters */
    2106        5040 :         blob_persist_clear_clusters(seq, ctx);
    2107             : }
    2108             : 
    2109             : static void
    2110        5084 : blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2111             : {
    2112        5084 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2113        5084 :         struct spdk_blob                *blob = ctx->blob;
    2114        5084 :         struct spdk_blob_store          *bs = blob->bs;
    2115             :         uint64_t                        lba;
    2116             :         uint64_t                        lba_count;
    2117             :         spdk_bs_batch_t                 *batch;
    2118             :         size_t                          i;
    2119             : 
    2120        5084 :         if (bserrno != 0) {
    2121          40 :                 blob_persist_complete(seq, ctx, bserrno);
    2122          40 :                 return;
    2123             :         }
    2124             : 
    2125        5044 :         batch = bs_sequence_to_batch(seq, blob_persist_zero_pages_cpl, ctx);
    2126             : 
    2127        5044 :         lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
    2128             : 
    2129             :         /* This loop starts at 1 because the first page is special and handled
    2130             :          * below. The pages (except the first) are never written in place,
    2131             :          * so any pages in the clean list must be zeroed.
    2132             :          */
    2133        5112 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2134          68 :                 lba = bs_md_page_to_lba(bs, blob->clean.pages[i]);
    2135             : 
    2136          68 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2137             :         }
    2138             : 
    2139             :         /* The first page will only be zeroed if this is a delete. */
    2140        5044 :         if (blob->active.num_pages == 0) {
    2141             :                 uint32_t page_num;
    2142             : 
    2143             :                 /* The first page in the metadata goes where the blobid indicates */
    2144        1488 :                 page_num = bs_blobid_to_page(blob->id);
    2145        1488 :                 lba = bs_md_page_to_lba(bs, page_num);
    2146             : 
    2147        1488 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2148             :         }
    2149             : 
    2150        5044 :         bs_batch_close(batch);
    2151             : }
    2152             : 
    2153             : static void
    2154        3596 : blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2155             : {
    2156        3596 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2157        3596 :         struct spdk_blob                *blob = ctx->blob;
    2158        3596 :         struct spdk_blob_store          *bs = blob->bs;
    2159             :         uint64_t                        lba;
    2160             :         uint32_t                        lba_count;
    2161             :         struct spdk_blob_md_page        *page;
    2162             : 
    2163        3596 :         if (bserrno != 0) {
    2164           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2165           0 :                 return;
    2166             :         }
    2167             : 
    2168        3596 :         if (blob->active.num_pages == 0) {
    2169             :                 /* Move on to the next step */
    2170           0 :                 blob_persist_zero_pages(seq, ctx, 0);
    2171           0 :                 return;
    2172             :         }
    2173             : 
    2174        3596 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2175             : 
    2176        3596 :         page = &ctx->pages[0];
    2177             :         /* The first page in the metadata goes where the blobid indicates */
    2178        3596 :         lba = bs_md_page_to_lba(bs, bs_blobid_to_page(blob->id));
    2179             : 
    2180        3596 :         bs_sequence_write_dev(seq, page, lba, lba_count,
    2181             :                               blob_persist_zero_pages, ctx);
    2182             : }
    2183             : 
    2184             : static void
    2185        3596 : blob_persist_write_page_chain(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2186             : {
    2187        3596 :         struct spdk_blob                *blob = ctx->blob;
    2188        3596 :         struct spdk_blob_store          *bs = blob->bs;
    2189             :         uint64_t                        lba;
    2190             :         uint32_t                        lba_count;
    2191             :         struct spdk_blob_md_page        *page;
    2192             :         spdk_bs_batch_t                 *batch;
    2193             :         size_t                          i;
    2194             : 
    2195             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2196             :          * at the end, but no changes ever occur in the middle of the list.
    2197             :          */
    2198             : 
    2199        3596 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2200             : 
    2201        3596 :         batch = bs_sequence_to_batch(seq, blob_persist_write_page_root, ctx);
    2202             : 
    2203             :         /* This starts at 1. The root page is not written until
    2204             :          * all of the others are finished
    2205             :          */
    2206        3684 :         for (i = 1; i < blob->active.num_pages; i++) {
    2207          88 :                 page = &ctx->pages[i];
    2208          88 :                 assert(page->sequence_num == i);
    2209             : 
    2210          88 :                 lba = bs_md_page_to_lba(bs, blob->active.pages[i]);
    2211             : 
    2212          88 :                 bs_batch_write_dev(batch, page, lba, lba_count);
    2213             :         }
    2214             : 
    2215        3596 :         bs_batch_close(batch);
    2216        3596 : }
    2217             : 
    2218             : static int
    2219        3568 : blob_resize(struct spdk_blob *blob, uint64_t sz)
    2220             : {
    2221             :         uint64_t        i;
    2222             :         uint64_t        *tmp;
    2223        3568 :         uint64_t        cluster;
    2224        3568 :         uint32_t        lfmd; /*  lowest free md page */
    2225             :         uint64_t        num_clusters;
    2226             :         uint32_t        *ep_tmp;
    2227        3568 :         uint64_t        new_num_ep = 0, current_num_ep = 0;
    2228             :         struct spdk_blob_store *bs;
    2229             :         int             rc;
    2230             : 
    2231        3568 :         bs = blob->bs;
    2232             : 
    2233        3568 :         blob_verify_md_op(blob);
    2234             : 
    2235        3568 :         if (blob->active.num_clusters == sz) {
    2236         456 :                 return 0;
    2237             :         }
    2238             : 
    2239        3112 :         if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2240             :                 /* If this blob was resized to be larger, then smaller, then
    2241             :                  * larger without syncing, then the cluster array already
    2242             :                  * contains spare assigned clusters we can use.
    2243             :                  */
    2244           0 :                 num_clusters = spdk_min(blob->active.cluster_array_size,
    2245             :                                         sz);
    2246             :         } else {
    2247        3112 :                 num_clusters = blob->active.num_clusters;
    2248             :         }
    2249             : 
    2250        3112 :         if (blob->use_extent_table) {
    2251             :                 /* Round up since every cluster beyond current Extent Table size,
    2252             :                  * requires new extent page. */
    2253        1578 :                 new_num_ep = spdk_divide_round_up(sz, SPDK_EXTENTS_PER_EP);
    2254        1578 :                 current_num_ep = spdk_divide_round_up(num_clusters, SPDK_EXTENTS_PER_EP);
    2255             :         }
    2256             : 
    2257        3112 :         assert(!spdk_spin_held(&bs->used_lock));
    2258             : 
    2259             :         /* Check first that we have enough clusters and md pages before we start claiming them.
    2260             :          * bs->used_lock is held to ensure that clusters we think are free are still free when we go
    2261             :          * to claim them later in this function.
    2262             :          */
    2263        3112 :         if (sz > num_clusters && spdk_blob_is_thin_provisioned(blob) == false) {
    2264        1302 :                 spdk_spin_lock(&bs->used_lock);
    2265        1302 :                 if ((sz - num_clusters) > bs->num_free_clusters) {
    2266           8 :                         rc = -ENOSPC;
    2267           8 :                         goto out;
    2268             :                 }
    2269        1294 :                 lfmd = 0;
    2270        1938 :                 for (i = current_num_ep; i < new_num_ep ; i++) {
    2271         644 :                         lfmd = spdk_bit_array_find_first_clear(blob->bs->used_md_pages, lfmd);
    2272         644 :                         if (lfmd == UINT32_MAX) {
    2273             :                                 /* No more free md pages. Cannot satisfy the request */
    2274           0 :                                 rc = -ENOSPC;
    2275           0 :                                 goto out;
    2276             :                         }
    2277             :                 }
    2278             :         }
    2279             : 
    2280        3104 :         if (sz > num_clusters) {
    2281             :                 /* Expand the cluster array if necessary.
    2282             :                  * We only shrink the array when persisting.
    2283             :                  */
    2284        1702 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * sz);
    2285        1702 :                 if (sz > 0 && tmp == NULL) {
    2286           0 :                         rc = -ENOMEM;
    2287           0 :                         goto out;
    2288             :                 }
    2289        1702 :                 memset(tmp + blob->active.cluster_array_size, 0,
    2290        1702 :                        sizeof(*blob->active.clusters) * (sz - blob->active.cluster_array_size));
    2291        1702 :                 blob->active.clusters = tmp;
    2292        1702 :                 blob->active.cluster_array_size = sz;
    2293             : 
    2294             :                 /* Expand the extents table, only if enough clusters were added */
    2295        1702 :                 if (new_num_ep > current_num_ep && blob->use_extent_table) {
    2296         840 :                         ep_tmp = realloc(blob->active.extent_pages, sizeof(*blob->active.extent_pages) * new_num_ep);
    2297         840 :                         if (new_num_ep > 0 && ep_tmp == NULL) {
    2298           0 :                                 rc = -ENOMEM;
    2299           0 :                                 goto out;
    2300             :                         }
    2301         840 :                         memset(ep_tmp + blob->active.extent_pages_array_size, 0,
    2302         840 :                                sizeof(*blob->active.extent_pages) * (new_num_ep - blob->active.extent_pages_array_size));
    2303         840 :                         blob->active.extent_pages = ep_tmp;
    2304         840 :                         blob->active.extent_pages_array_size = new_num_ep;
    2305             :                 }
    2306             :         }
    2307             : 
    2308        3104 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    2309             : 
    2310        3104 :         if (spdk_blob_is_thin_provisioned(blob) == false) {
    2311        2428 :                 cluster = 0;
    2312        2428 :                 lfmd = 0;
    2313        9832 :                 for (i = num_clusters; i < sz; i++) {
    2314        7404 :                         bs_allocate_cluster(blob, i, &cluster, &lfmd, true);
    2315             :                         /* Do not increment lfmd here.  lfmd will get updated
    2316             :                          * to the md_page allocated (if any) when a new extent
    2317             :                          * page is needed.  Just pass that value again,
    2318             :                          * bs_allocate_cluster will just start at that index
    2319             :                          * to find the next free md_page when needed.
    2320             :                          */
    2321             :                 }
    2322             :         }
    2323             : 
    2324             :         /* If we are shrinking the blob, we must adjust num_allocated_clusters */
    2325     1072151 :         for (i = sz; i < num_clusters; i++) {
    2326     1069047 :                 if (blob->active.clusters[i] != 0) {
    2327        2343 :                         blob->active.num_allocated_clusters--;
    2328             :                 }
    2329             :         }
    2330             : 
    2331        3104 :         blob->active.num_clusters = sz;
    2332        3104 :         blob->active.num_extent_pages = new_num_ep;
    2333             : 
    2334        3104 :         rc = 0;
    2335        3112 : out:
    2336        3112 :         if (spdk_spin_held(&bs->used_lock)) {
    2337        1302 :                 spdk_spin_unlock(&bs->used_lock);
    2338             :         }
    2339             : 
    2340        3112 :         return rc;
    2341             : }
    2342             : 
    2343             : static void
    2344        3596 : blob_persist_generate_new_md(struct spdk_blob_persist_ctx *ctx)
    2345             : {
    2346        3596 :         spdk_bs_sequence_t *seq = ctx->seq;
    2347        3596 :         struct spdk_blob *blob = ctx->blob;
    2348        3596 :         struct spdk_blob_store *bs = blob->bs;
    2349             :         uint64_t i;
    2350             :         uint32_t page_num;
    2351             :         void *tmp;
    2352             :         int rc;
    2353             : 
    2354             :         /* Generate the new metadata */
    2355        3596 :         rc = blob_serialize(blob, &ctx->pages, &blob->active.num_pages);
    2356        3596 :         if (rc < 0) {
    2357           0 :                 blob_persist_complete(seq, ctx, rc);
    2358           0 :                 return;
    2359             :         }
    2360             : 
    2361        3596 :         assert(blob->active.num_pages >= 1);
    2362             : 
    2363             :         /* Resize the cache of page indices */
    2364        3596 :         tmp = realloc(blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
    2365        3596 :         if (!tmp) {
    2366           0 :                 blob_persist_complete(seq, ctx, -ENOMEM);
    2367           0 :                 return;
    2368             :         }
    2369        3596 :         blob->active.pages = tmp;
    2370             : 
    2371             :         /* Assign this metadata to pages. This requires two passes - one to verify that there are
    2372             :          * enough pages and a second to actually claim them. The used_lock is held across
    2373             :          * both passes to ensure things don't change in the middle.
    2374             :          */
    2375        3596 :         spdk_spin_lock(&bs->used_lock);
    2376        3596 :         page_num = 0;
    2377             :         /* Note that this loop starts at one. The first page location is fixed by the blobid. */
    2378        3684 :         for (i = 1; i < blob->active.num_pages; i++) {
    2379          88 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2380          88 :                 if (page_num == UINT32_MAX) {
    2381           0 :                         spdk_spin_unlock(&bs->used_lock);
    2382           0 :                         blob_persist_complete(seq, ctx, -ENOMEM);
    2383           0 :                         return;
    2384             :                 }
    2385          88 :                 page_num++;
    2386             :         }
    2387             : 
    2388        3596 :         page_num = 0;
    2389        3596 :         blob->active.pages[0] = bs_blobid_to_page(blob->id);
    2390        3684 :         for (i = 1; i < blob->active.num_pages; i++) {
    2391          88 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2392          88 :                 ctx->pages[i - 1].next = page_num;
    2393             :                 /* Now that previous metadata page is complete, calculate the crc for it. */
    2394          88 :                 ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2395          88 :                 blob->active.pages[i] = page_num;
    2396          88 :                 bs_claim_md_page(bs, page_num);
    2397          88 :                 SPDK_DEBUGLOG(blob, "Claiming page %u for blob 0x%" PRIx64 "\n", page_num,
    2398             :                               blob->id);
    2399          88 :                 page_num++;
    2400             :         }
    2401        3596 :         spdk_spin_unlock(&bs->used_lock);
    2402        3596 :         ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2403             :         /* Start writing the metadata from last page to first */
    2404        3596 :         blob->state = SPDK_BLOB_STATE_CLEAN;
    2405        3596 :         blob_persist_write_page_chain(seq, ctx);
    2406             : }
    2407             : 
    2408             : static void
    2409        2354 : blob_persist_write_extent_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2410             : {
    2411        2354 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2412        2354 :         struct spdk_blob                *blob = ctx->blob;
    2413             :         size_t                          i;
    2414             :         uint32_t                        extent_page_id;
    2415        2354 :         uint32_t                        page_count = 0;
    2416             :         int                             rc;
    2417             : 
    2418        2354 :         if (ctx->extent_page != NULL) {
    2419         666 :                 spdk_free(ctx->extent_page);
    2420         666 :                 ctx->extent_page = NULL;
    2421             :         }
    2422             : 
    2423        2354 :         if (bserrno != 0) {
    2424           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2425           0 :                 return;
    2426             :         }
    2427             : 
    2428             :         /* Only write out Extent Pages when blob was resized. */
    2429        4608 :         for (i = ctx->next_extent_page; i < blob->active.extent_pages_array_size; i++) {
    2430        2920 :                 extent_page_id = blob->active.extent_pages[i];
    2431        2920 :                 if (extent_page_id == 0) {
    2432             :                         /* No Extent Page to persist */
    2433        2254 :                         assert(spdk_blob_is_thin_provisioned(blob));
    2434        2254 :                         continue;
    2435             :                 }
    2436         666 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, extent_page_id));
    2437         666 :                 ctx->next_extent_page = i + 1;
    2438         666 :                 rc = blob_serialize_add_page(ctx->blob, &ctx->extent_page, &page_count, &ctx->extent_page);
    2439         666 :                 if (rc < 0) {
    2440           0 :                         blob_persist_complete(seq, ctx, rc);
    2441           0 :                         return;
    2442             :                 }
    2443             : 
    2444         666 :                 blob->state = SPDK_BLOB_STATE_DIRTY;
    2445         666 :                 blob_serialize_extent_page(blob, i * SPDK_EXTENTS_PER_EP, ctx->extent_page);
    2446             : 
    2447         666 :                 ctx->extent_page->crc = blob_md_page_calc_crc(ctx->extent_page);
    2448             : 
    2449         666 :                 bs_sequence_write_dev(seq, ctx->extent_page, bs_md_page_to_lba(blob->bs, extent_page_id),
    2450         666 :                                       bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
    2451             :                                       blob_persist_write_extent_pages, ctx);
    2452         666 :                 return;
    2453             :         }
    2454             : 
    2455        1688 :         blob_persist_generate_new_md(ctx);
    2456             : }
    2457             : 
    2458             : static void
    2459        5092 : blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2460             : {
    2461        5092 :         struct spdk_blob_persist_ctx *ctx = cb_arg;
    2462        5092 :         struct spdk_blob *blob = ctx->blob;
    2463             : 
    2464        5092 :         if (bserrno != 0) {
    2465           8 :                 blob_persist_complete(seq, ctx, bserrno);
    2466           8 :                 return;
    2467             :         }
    2468             : 
    2469        5084 :         if (blob->active.num_pages == 0) {
    2470             :                 /* This is the signal that the blob should be deleted.
    2471             :                  * Immediately jump to the clean up routine. */
    2472        1488 :                 assert(blob->clean.num_pages > 0);
    2473        1488 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
    2474        1488 :                 blob_persist_zero_pages(seq, ctx, 0);
    2475        1488 :                 return;
    2476             : 
    2477             :         }
    2478             : 
    2479        3596 :         if (blob->clean.num_clusters < blob->active.num_clusters) {
    2480             :                 /* Blob was resized up */
    2481        1674 :                 assert(blob->clean.num_extent_pages <= blob->active.num_extent_pages);
    2482        1674 :                 ctx->next_extent_page = spdk_max(1, blob->clean.num_extent_pages) - 1;
    2483        1922 :         } else if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2484             :                 /* Blob was resized down */
    2485          14 :                 assert(blob->clean.num_extent_pages >= blob->active.num_extent_pages);
    2486          14 :                 ctx->next_extent_page = spdk_max(1, blob->active.num_extent_pages) - 1;
    2487             :         } else {
    2488             :                 /* No change in size occurred */
    2489        1908 :                 blob_persist_generate_new_md(ctx);
    2490        1908 :                 return;
    2491             :         }
    2492             : 
    2493        1688 :         blob_persist_write_extent_pages(seq, ctx, 0);
    2494             : }
    2495             : 
    2496             : struct spdk_bs_mark_dirty {
    2497             :         struct spdk_blob_store          *bs;
    2498             :         struct spdk_bs_super_block      *super;
    2499             :         spdk_bs_sequence_cpl            cb_fn;
    2500             :         void                            *cb_arg;
    2501             : };
    2502             : 
    2503             : static void
    2504         158 : bs_mark_dirty_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2505             : {
    2506         158 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2507             : 
    2508         158 :         if (bserrno == 0) {
    2509         150 :                 ctx->bs->clean = 0;
    2510             :         }
    2511             : 
    2512         158 :         ctx->cb_fn(seq, ctx->cb_arg, bserrno);
    2513             : 
    2514         158 :         spdk_free(ctx->super);
    2515         158 :         free(ctx);
    2516         158 : }
    2517             : 
    2518             : static void bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2519             :                            struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    2520             : 
    2521             : 
    2522             : static void
    2523         158 : bs_mark_dirty_write(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2524             : {
    2525         158 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2526             :         int rc;
    2527             : 
    2528         158 :         if (bserrno != 0) {
    2529           4 :                 bs_mark_dirty_write_cpl(seq, ctx, bserrno);
    2530           4 :                 return;
    2531             :         }
    2532             : 
    2533         154 :         rc = bs_super_validate(ctx->super, ctx->bs);
    2534         154 :         if (rc != 0) {
    2535           0 :                 bs_mark_dirty_write_cpl(seq, ctx, rc);
    2536           0 :                 return;
    2537             :         }
    2538             : 
    2539         154 :         ctx->super->clean = 0;
    2540         154 :         if (ctx->super->size == 0) {
    2541           4 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    2542             :         }
    2543             : 
    2544         154 :         bs_write_super(seq, ctx->bs, ctx->super, bs_mark_dirty_write_cpl, ctx);
    2545             : }
    2546             : 
    2547             : static void
    2548        5526 : bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2549             :               spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2550             : {
    2551             :         struct spdk_bs_mark_dirty *ctx;
    2552             : 
    2553             :         /* Blobstore is already marked dirty */
    2554        5526 :         if (bs->clean == 0) {
    2555        5368 :                 cb_fn(seq, cb_arg, 0);
    2556        5368 :                 return;
    2557             :         }
    2558             : 
    2559         158 :         ctx = calloc(1, sizeof(*ctx));
    2560         158 :         if (!ctx) {
    2561           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2562           0 :                 return;
    2563             :         }
    2564         158 :         ctx->bs = bs;
    2565         158 :         ctx->cb_fn = cb_fn;
    2566         158 :         ctx->cb_arg = cb_arg;
    2567             : 
    2568         158 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    2569             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    2570         158 :         if (!ctx->super) {
    2571           0 :                 free(ctx);
    2572           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2573           0 :                 return;
    2574             :         }
    2575             : 
    2576         158 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    2577         158 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    2578             :                              bs_mark_dirty_write, ctx);
    2579             : }
    2580             : 
    2581             : /* Write a blob to disk */
    2582             : static void
    2583        9104 : blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    2584             :              spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2585             : {
    2586             :         struct spdk_blob_persist_ctx *ctx;
    2587             : 
    2588        9104 :         blob_verify_md_op(blob);
    2589             : 
    2590        9104 :         if (blob->state == SPDK_BLOB_STATE_CLEAN && TAILQ_EMPTY(&blob->persists_to_complete)) {
    2591        4012 :                 cb_fn(seq, cb_arg, 0);
    2592        4012 :                 return;
    2593             :         }
    2594             : 
    2595        5092 :         ctx = calloc(1, sizeof(*ctx));
    2596        5092 :         if (!ctx) {
    2597           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2598           0 :                 return;
    2599             :         }
    2600        5092 :         ctx->blob = blob;
    2601        5092 :         ctx->seq = seq;
    2602        5092 :         ctx->cb_fn = cb_fn;
    2603        5092 :         ctx->cb_arg = cb_arg;
    2604             : 
    2605             :         /* Multiple blob persists can affect one another, via blob->state or
    2606             :          * blob mutable data changes. To prevent it, queue up the persists. */
    2607        5092 :         if (!TAILQ_EMPTY(&blob->persists_to_complete)) {
    2608          23 :                 TAILQ_INSERT_TAIL(&blob->pending_persists, ctx, link);
    2609          23 :                 return;
    2610             :         }
    2611        5069 :         TAILQ_INSERT_HEAD(&blob->persists_to_complete, ctx, link);
    2612             : 
    2613        5069 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, ctx);
    2614             : }
    2615             : 
    2616             : struct spdk_blob_copy_cluster_ctx {
    2617             :         struct spdk_blob *blob;
    2618             :         uint8_t *buf;
    2619             :         uint64_t page;
    2620             :         uint64_t new_cluster;
    2621             :         uint32_t new_extent_page;
    2622             :         spdk_bs_sequence_t *seq;
    2623             :         struct spdk_blob_md_page *new_cluster_page;
    2624             : };
    2625             : 
    2626             : struct spdk_blob_free_cluster_ctx {
    2627             :         struct spdk_blob *blob;
    2628             :         uint64_t page;
    2629             :         struct spdk_blob_md_page *md_page;
    2630             :         uint64_t cluster_num;
    2631             :         uint32_t extent_page;
    2632             :         spdk_bs_sequence_t *seq;
    2633             : };
    2634             : 
    2635             : static void
    2636         812 : blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
    2637             : {
    2638         812 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2639         812 :         struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
    2640         812 :         TAILQ_HEAD(, spdk_bs_request_set) requests;
    2641             :         spdk_bs_user_op_t *op;
    2642             : 
    2643         812 :         TAILQ_INIT(&requests);
    2644         812 :         TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);
    2645             : 
    2646        1624 :         while (!TAILQ_EMPTY(&requests)) {
    2647         812 :                 op = TAILQ_FIRST(&requests);
    2648         812 :                 TAILQ_REMOVE(&requests, op, link);
    2649         812 :                 if (bserrno == 0) {
    2650         812 :                         bs_user_op_execute(op);
    2651             :                 } else {
    2652           0 :                         bs_user_op_abort(op, bserrno);
    2653             :                 }
    2654             :         }
    2655             : 
    2656         812 :         spdk_free(ctx->buf);
    2657         812 :         free(ctx);
    2658         812 : }
    2659             : 
    2660             : static void
    2661          60 : blob_free_cluster_cpl(void *cb_arg, int bserrno)
    2662             : {
    2663          60 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    2664          60 :         spdk_bs_sequence_t *seq = ctx->seq;
    2665             : 
    2666          60 :         bs_sequence_finish(seq, bserrno);
    2667             : 
    2668          60 :         free(ctx);
    2669          60 : }
    2670             : 
    2671             : static void
    2672           4 : blob_insert_cluster_revert(struct spdk_blob_copy_cluster_ctx *ctx)
    2673             : {
    2674           4 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    2675           4 :         bs_release_cluster(ctx->blob->bs, ctx->new_cluster);
    2676           4 :         if (ctx->new_extent_page != 0) {
    2677           2 :                 bs_release_md_page(ctx->blob->bs, ctx->new_extent_page);
    2678             :         }
    2679           4 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    2680           4 : }
    2681             : 
    2682             : static void
    2683           4 : blob_insert_cluster_clear_cpl(void *cb_arg, int bserrno)
    2684             : {
    2685           4 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2686             : 
    2687           4 :         if (bserrno) {
    2688           0 :                 SPDK_WARNLOG("Failed to clear cluster: %d\n", bserrno);
    2689             :         }
    2690             : 
    2691           4 :         blob_insert_cluster_revert(ctx);
    2692           4 :         bs_sequence_finish(ctx->seq, bserrno);
    2693           4 : }
    2694             : 
    2695             : static void
    2696           4 : blob_insert_cluster_clear(struct spdk_blob_copy_cluster_ctx *ctx)
    2697             : {
    2698           4 :         struct spdk_bs_cpl cpl;
    2699             :         spdk_bs_batch_t *batch;
    2700           4 :         struct spdk_io_channel *ch = spdk_io_channel_from_ctx(ctx->seq->channel);
    2701             : 
    2702             :         /*
    2703             :          * We allocated a cluster and we copied data to it. But now, we realized that we don't need
    2704             :          * this cluster and we want to release it. We must ensure that we clear the data on this
    2705             :          * cluster.
    2706             :          * The cluster may later be re-allocated by a thick-provisioned blob for example. When
    2707             :          * reading from this thick-provisioned blob before writing data, we should read zeroes.
    2708             :          */
    2709             : 
    2710           4 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2711           4 :         cpl.u.blob_basic.cb_fn = blob_insert_cluster_clear_cpl;
    2712           4 :         cpl.u.blob_basic.cb_arg = ctx;
    2713             : 
    2714           4 :         batch = bs_batch_open(ch, &cpl, ctx->blob);
    2715           4 :         if (!batch) {
    2716           0 :                 blob_insert_cluster_clear_cpl(ctx, -ENOMEM);
    2717           0 :                 return;
    2718             :         }
    2719             : 
    2720           4 :         bs_batch_clear_dev(ctx->blob, batch, bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2721           4 :                            bs_cluster_to_lba(ctx->blob->bs, 1));
    2722           4 :         bs_batch_close(batch);
    2723             : }
    2724             : 
    2725             : static void
    2726         812 : blob_insert_cluster_cpl(void *cb_arg, int bserrno)
    2727             : {
    2728         812 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2729             : 
    2730         812 :         if (bserrno) {
    2731           4 :                 if (bserrno == -EEXIST) {
    2732             :                         /* The metadata insert failed because another thread
    2733             :                          * allocated the cluster first. Clear and free our cluster
    2734             :                          * but continue without error. */
    2735           4 :                         blob_insert_cluster_clear(ctx);
    2736           4 :                         return;
    2737             :                 }
    2738             : 
    2739           0 :                 blob_insert_cluster_revert(ctx);
    2740             :         }
    2741             : 
    2742         808 :         bs_sequence_finish(ctx->seq, bserrno);
    2743             : }
    2744             : 
    2745             : static void
    2746         408 : blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2747             : {
    2748         408 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2749             :         uint32_t cluster_number;
    2750             : 
    2751         408 :         if (bserrno) {
    2752             :                 /* The write failed, so jump to the final completion handler */
    2753           0 :                 bs_sequence_finish(seq, bserrno);
    2754           0 :                 return;
    2755             :         }
    2756             : 
    2757         408 :         cluster_number = bs_page_to_cluster(ctx->blob->bs, ctx->page);
    2758             : 
    2759         408 :         blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2760             :                                          ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2761             : }
    2762             : 
    2763             : static void
    2764         280 : blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2765             : {
    2766         280 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2767             : 
    2768         280 :         if (bserrno != 0) {
    2769             :                 /* The read failed, so jump to the final completion handler */
    2770           0 :                 bs_sequence_finish(seq, bserrno);
    2771           0 :                 return;
    2772             :         }
    2773             : 
    2774             :         /* Write whole cluster */
    2775         280 :         bs_sequence_write_dev(seq, ctx->buf,
    2776         280 :                               bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2777         280 :                               bs_cluster_to_lba(ctx->blob->bs, 1),
    2778             :                               blob_write_copy_cpl, ctx);
    2779             : }
    2780             : 
    2781             : static bool
    2782         796 : blob_can_copy(struct spdk_blob *blob, uint32_t cluster_start_page, uint64_t *base_lba)
    2783             : {
    2784         796 :         uint64_t lba = bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page);
    2785             : 
    2786        1146 :         return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
    2787         350 :                blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
    2788             : }
    2789             : 
    2790             : static void
    2791         128 : blob_copy(struct spdk_blob_copy_cluster_ctx *ctx, spdk_bs_user_op_t *op, uint64_t src_lba)
    2792             : {
    2793         128 :         struct spdk_blob *blob = ctx->blob;
    2794         128 :         uint64_t lba_count = bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz);
    2795             : 
    2796         128 :         bs_sequence_copy_dev(ctx->seq,
    2797         128 :                              bs_cluster_to_lba(blob->bs, ctx->new_cluster),
    2798             :                              src_lba,
    2799             :                              lba_count,
    2800             :                              blob_write_copy_cpl, ctx);
    2801         128 : }
    2802             : 
    2803             : static void
    2804         812 : bs_allocate_and_copy_cluster(struct spdk_blob *blob,
    2805             :                              struct spdk_io_channel *_ch,
    2806             :                              uint64_t io_unit, spdk_bs_user_op_t *op)
    2807             : {
    2808         812 :         struct spdk_bs_cpl cpl;
    2809             :         struct spdk_bs_channel *ch;
    2810             :         struct spdk_blob_copy_cluster_ctx *ctx;
    2811             :         uint32_t cluster_start_page;
    2812             :         uint32_t cluster_number;
    2813             :         bool is_zeroes;
    2814             :         bool can_copy;
    2815             :         bool is_valid_range;
    2816         812 :         uint64_t copy_src_lba;
    2817             :         int rc;
    2818             : 
    2819         812 :         ch = spdk_io_channel_get_ctx(_ch);
    2820             : 
    2821         812 :         if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
    2822             :                 /* There are already operations pending. Queue this user op
    2823             :                  * and return because it will be re-executed when the outstanding
    2824             :                  * cluster allocation completes. */
    2825           0 :                 TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2826           0 :                 return;
    2827             :         }
    2828             : 
    2829             :         /* Round the io_unit offset down to the first page in the cluster */
    2830         812 :         cluster_start_page = bs_io_unit_to_cluster_start(blob, io_unit);
    2831             : 
    2832             :         /* Calculate which index in the metadata cluster array the corresponding
    2833             :          * cluster is supposed to be at. */
    2834         812 :         cluster_number = bs_io_unit_to_cluster_number(blob, io_unit);
    2835             : 
    2836         812 :         ctx = calloc(1, sizeof(*ctx));
    2837         812 :         if (!ctx) {
    2838           0 :                 bs_user_op_abort(op, -ENOMEM);
    2839           0 :                 return;
    2840             :         }
    2841             : 
    2842         812 :         assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);
    2843             : 
    2844         812 :         ctx->blob = blob;
    2845         812 :         ctx->page = cluster_start_page;
    2846         812 :         ctx->new_cluster_page = ch->new_cluster_page;
    2847         812 :         memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE);
    2848             : 
    2849             :         /* Check if the cluster that we intend to do CoW for is valid for
    2850             :          * the backing dev. For zeroes backing dev, it'll be always valid.
    2851             :          * For other backing dev e.g. a snapshot, it could be invalid if
    2852             :          * the blob has been resized after snapshot was taken. */
    2853         812 :         is_valid_range = blob->back_bs_dev->is_range_valid(blob->back_bs_dev,
    2854             :                          bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2855         812 :                          bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2856             : 
    2857         812 :         can_copy = is_valid_range && blob_can_copy(blob, cluster_start_page, &copy_src_lba);
    2858             : 
    2859        1608 :         is_zeroes = is_valid_range && blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
    2860             :                         bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2861         796 :                         bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2862         812 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes && !can_copy) {
    2863         280 :                 ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
    2864             :                                        NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    2865         280 :                 if (!ctx->buf) {
    2866           0 :                         SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n",
    2867             :                                     blob->bs->cluster_sz);
    2868           0 :                         free(ctx);
    2869           0 :                         bs_user_op_abort(op, -ENOMEM);
    2870           0 :                         return;
    2871             :                 }
    2872             :         }
    2873             : 
    2874         812 :         spdk_spin_lock(&blob->bs->used_lock);
    2875         812 :         rc = bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, &ctx->new_extent_page,
    2876             :                                  false);
    2877         812 :         spdk_spin_unlock(&blob->bs->used_lock);
    2878         812 :         if (rc != 0) {
    2879           0 :                 spdk_free(ctx->buf);
    2880           0 :                 free(ctx);
    2881           0 :                 bs_user_op_abort(op, rc);
    2882           0 :                 return;
    2883             :         }
    2884             : 
    2885         812 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2886         812 :         cpl.u.blob_basic.cb_fn = blob_allocate_and_copy_cluster_cpl;
    2887         812 :         cpl.u.blob_basic.cb_arg = ctx;
    2888             : 
    2889         812 :         ctx->seq = bs_sequence_start_blob(_ch, &cpl, blob);
    2890         812 :         if (!ctx->seq) {
    2891           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    2892           0 :                 bs_release_cluster(blob->bs, ctx->new_cluster);
    2893           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    2894           0 :                 spdk_free(ctx->buf);
    2895           0 :                 free(ctx);
    2896           0 :                 bs_user_op_abort(op, -ENOMEM);
    2897           0 :                 return;
    2898             :         }
    2899             : 
    2900             :         /* Queue the user op to block other incoming operations */
    2901         812 :         TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2902             : 
    2903         812 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
    2904         408 :                 if (can_copy) {
    2905         128 :                         blob_copy(ctx, op, copy_src_lba);
    2906             :                 } else {
    2907             :                         /* Read cluster from backing device */
    2908         280 :                         bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
    2909             :                                                 bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2910         280 :                                                 bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
    2911             :                                                 blob_write_copy, ctx);
    2912             :                 }
    2913             : 
    2914             :         } else {
    2915         404 :                 blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2916             :                                                  ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2917             :         }
    2918             : }
    2919             : 
    2920             : static inline bool
    2921       40206 : blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
    2922             :                                  uint64_t *lba, uint64_t *lba_count)
    2923             : {
    2924       40206 :         *lba_count = length;
    2925             : 
    2926       40206 :         if (!bs_io_unit_is_allocated(blob, io_unit)) {
    2927        2992 :                 assert(blob->back_bs_dev != NULL);
    2928        2992 :                 *lba = bs_io_unit_to_back_dev_lba(blob, io_unit);
    2929        2992 :                 *lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count);
    2930        2992 :                 return false;
    2931             :         } else {
    2932       37214 :                 *lba = bs_blob_io_unit_to_lba(blob, io_unit);
    2933       37214 :                 return true;
    2934             :         }
    2935             : }
    2936             : 
    2937             : struct op_split_ctx {
    2938             :         struct spdk_blob *blob;
    2939             :         struct spdk_io_channel *channel;
    2940             :         uint64_t io_unit_offset;
    2941             :         uint64_t io_units_remaining;
    2942             :         void *curr_payload;
    2943             :         enum spdk_blob_op_type op_type;
    2944             :         spdk_bs_sequence_t *seq;
    2945             :         bool in_submit_ctx;
    2946             :         bool completed_in_submit_ctx;
    2947             :         bool done;
    2948             : };
    2949             : 
    2950             : static void
    2951         774 : blob_request_submit_op_split_next(void *cb_arg, int bserrno)
    2952             : {
    2953         774 :         struct op_split_ctx     *ctx = cb_arg;
    2954         774 :         struct spdk_blob        *blob = ctx->blob;
    2955         774 :         struct spdk_io_channel  *ch = ctx->channel;
    2956         774 :         enum spdk_blob_op_type  op_type = ctx->op_type;
    2957             :         uint8_t                 *buf;
    2958             :         uint64_t                offset;
    2959             :         uint64_t                length;
    2960             :         uint64_t                op_length;
    2961             : 
    2962         774 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    2963         178 :                 bs_sequence_finish(ctx->seq, bserrno);
    2964         178 :                 if (ctx->in_submit_ctx) {
    2965             :                         /* Defer freeing of the ctx object, since it will be
    2966             :                          * accessed when this unwinds back to the submisison
    2967             :                          * context.
    2968             :                          */
    2969          40 :                         ctx->done = true;
    2970             :                 } else {
    2971         138 :                         free(ctx);
    2972             :                 }
    2973         178 :                 return;
    2974             :         }
    2975             : 
    2976         596 :         if (ctx->in_submit_ctx) {
    2977             :                 /* If this split operation completed in the context
    2978             :                  * of its submission, mark the flag and return immediately
    2979             :                  * to avoid recursion.
    2980             :                  */
    2981          68 :                 ctx->completed_in_submit_ctx = true;
    2982          68 :                 return;
    2983             :         }
    2984             : 
    2985             :         while (true) {
    2986         596 :                 ctx->completed_in_submit_ctx = false;
    2987             : 
    2988         596 :                 offset = ctx->io_unit_offset;
    2989         596 :                 length = ctx->io_units_remaining;
    2990         596 :                 buf = ctx->curr_payload;
    2991         596 :                 op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
    2992             :                                      offset));
    2993             : 
    2994             :                 /* Update length and payload for next operation */
    2995         596 :                 ctx->io_units_remaining -= op_length;
    2996         596 :                 ctx->io_unit_offset += op_length;
    2997         596 :                 if (op_type == SPDK_BLOB_WRITE || op_type == SPDK_BLOB_READ) {
    2998         528 :                         ctx->curr_payload += op_length * blob->bs->io_unit_size;
    2999             :                 }
    3000             : 
    3001         596 :                 assert(!ctx->in_submit_ctx);
    3002         596 :                 ctx->in_submit_ctx = true;
    3003             : 
    3004         596 :                 switch (op_type) {
    3005         418 :                 case SPDK_BLOB_READ:
    3006         418 :                         spdk_blob_io_read(blob, ch, buf, offset, op_length,
    3007             :                                           blob_request_submit_op_split_next, ctx);
    3008         418 :                         break;
    3009         110 :                 case SPDK_BLOB_WRITE:
    3010         110 :                         spdk_blob_io_write(blob, ch, buf, offset, op_length,
    3011             :                                            blob_request_submit_op_split_next, ctx);
    3012         110 :                         break;
    3013          36 :                 case SPDK_BLOB_UNMAP:
    3014          36 :                         spdk_blob_io_unmap(blob, ch, offset, op_length,
    3015             :                                            blob_request_submit_op_split_next, ctx);
    3016          36 :                         break;
    3017          32 :                 case SPDK_BLOB_WRITE_ZEROES:
    3018          32 :                         spdk_blob_io_write_zeroes(blob, ch, offset, op_length,
    3019             :                                                   blob_request_submit_op_split_next, ctx);
    3020          32 :                         break;
    3021           0 :                 case SPDK_BLOB_READV:
    3022             :                 case SPDK_BLOB_WRITEV:
    3023           0 :                         SPDK_ERRLOG("readv/write not valid\n");
    3024           0 :                         bs_sequence_finish(ctx->seq, -EINVAL);
    3025           0 :                         free(ctx);
    3026           0 :                         return;
    3027             :                 }
    3028             : 
    3029             : #ifndef __clang_analyzer__
    3030             :                 /* scan-build reports a false positive around accessing the ctx here. It
    3031             :                  * forms a path that recursively calls this function, but then says
    3032             :                  * "assuming ctx->in_submit_ctx is false", when that isn't possible.
    3033             :                  * This path does free(ctx), returns to here, and reports a use-after-free
    3034             :                  * bug.  Wrapping this bit of code so that scan-build doesn't see it
    3035             :                  * works around the scan-build bug.
    3036             :                  */
    3037         596 :                 assert(ctx->in_submit_ctx);
    3038         596 :                 ctx->in_submit_ctx = false;
    3039             : 
    3040             :                 /* If the operation completed immediately, loop back and submit the
    3041             :                  * next operation.  Otherwise we can return and the next split
    3042             :                  * operation will get submitted when this current operation is
    3043             :                  * later completed asynchronously.
    3044             :                  */
    3045         596 :                 if (ctx->completed_in_submit_ctx) {
    3046          68 :                         continue;
    3047         528 :                 } else if (ctx->done) {
    3048          40 :                         free(ctx);
    3049             :                 }
    3050             : #endif
    3051         528 :                 break;
    3052             :         }
    3053             : }
    3054             : 
    3055             : static void
    3056         178 : blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *blob,
    3057             :                              void *payload, uint64_t offset, uint64_t length,
    3058             :                              spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3059             : {
    3060             :         struct op_split_ctx *ctx;
    3061             :         spdk_bs_sequence_t *seq;
    3062         178 :         struct spdk_bs_cpl cpl;
    3063             : 
    3064         178 :         assert(blob != NULL);
    3065             : 
    3066         178 :         ctx = calloc(1, sizeof(struct op_split_ctx));
    3067         178 :         if (ctx == NULL) {
    3068           0 :                 cb_fn(cb_arg, -ENOMEM);
    3069           0 :                 return;
    3070             :         }
    3071             : 
    3072         178 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3073         178 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3074         178 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3075             : 
    3076         178 :         seq = bs_sequence_start_blob(ch, &cpl, blob);
    3077         178 :         if (!seq) {
    3078           0 :                 free(ctx);
    3079           0 :                 cb_fn(cb_arg, -ENOMEM);
    3080           0 :                 return;
    3081             :         }
    3082             : 
    3083         178 :         ctx->blob = blob;
    3084         178 :         ctx->channel = ch;
    3085         178 :         ctx->curr_payload = payload;
    3086         178 :         ctx->io_unit_offset = offset;
    3087         178 :         ctx->io_units_remaining = length;
    3088         178 :         ctx->op_type = op_type;
    3089         178 :         ctx->seq = seq;
    3090             : 
    3091         178 :         blob_request_submit_op_split_next(ctx, 0);
    3092             : }
    3093             : 
    3094             : static void
    3095          60 : spdk_free_cluster_unmap_complete(void *cb_arg, int bserrno)
    3096             : {
    3097          60 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    3098             : 
    3099          60 :         if (bserrno) {
    3100           0 :                 bs_sequence_finish(ctx->seq, bserrno);
    3101           0 :                 free(ctx);
    3102           0 :                 return;
    3103             :         }
    3104             : 
    3105          60 :         blob_free_cluster_on_md_thread(ctx->blob, ctx->cluster_num,
    3106             :                                        ctx->extent_page, ctx->md_page, blob_free_cluster_cpl, ctx);
    3107             : }
    3108             : 
    3109             : static void
    3110       37834 : blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blob,
    3111             :                               void *payload, uint64_t offset, uint64_t length,
    3112             :                               spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3113             : {
    3114       37834 :         struct spdk_bs_cpl cpl;
    3115       37834 :         uint64_t lba;
    3116       37834 :         uint64_t lba_count;
    3117             :         bool is_allocated;
    3118             : 
    3119       37834 :         assert(blob != NULL);
    3120             : 
    3121       37834 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3122       37834 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3123       37834 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3124             : 
    3125       37834 :         if (blob->frozen_refcnt) {
    3126             :                 /* This blob I/O is frozen */
    3127             :                 spdk_bs_user_op_t *op;
    3128           4 :                 struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3129             : 
    3130           4 :                 op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3131           4 :                 if (!op) {
    3132           0 :                         cb_fn(cb_arg, -ENOMEM);
    3133           0 :                         return;
    3134             :                 }
    3135             : 
    3136           4 :                 TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3137             : 
    3138           4 :                 return;
    3139             :         }
    3140             : 
    3141       37830 :         is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3142             : 
    3143       37830 :         switch (op_type) {
    3144       16887 :         case SPDK_BLOB_READ: {
    3145             :                 spdk_bs_batch_t *batch;
    3146             : 
    3147       16887 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3148       16887 :                 if (!batch) {
    3149           0 :                         cb_fn(cb_arg, -ENOMEM);
    3150           0 :                         return;
    3151             :                 }
    3152             : 
    3153       16887 :                 if (is_allocated) {
    3154             :                         /* Read from the blob */
    3155       15799 :                         bs_batch_read_dev(batch, payload, lba, lba_count);
    3156             :                 } else {
    3157             :                         /* Read from the backing block device */
    3158        1088 :                         bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
    3159             :                 }
    3160             : 
    3161       16887 :                 bs_batch_close(batch);
    3162       16887 :                 break;
    3163             :         }
    3164       20851 :         case SPDK_BLOB_WRITE:
    3165             :         case SPDK_BLOB_WRITE_ZEROES: {
    3166       20851 :                 if (is_allocated) {
    3167             :                         /* Write to the blob */
    3168             :                         spdk_bs_batch_t *batch;
    3169             : 
    3170       20507 :                         if (lba_count == 0) {
    3171           0 :                                 cb_fn(cb_arg, 0);
    3172           0 :                                 return;
    3173             :                         }
    3174             : 
    3175       20507 :                         batch = bs_batch_open(_ch, &cpl, blob);
    3176       20507 :                         if (!batch) {
    3177           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3178           0 :                                 return;
    3179             :                         }
    3180             : 
    3181       20507 :                         if (op_type == SPDK_BLOB_WRITE) {
    3182       20475 :                                 bs_batch_write_dev(batch, payload, lba, lba_count);
    3183             :                         } else {
    3184          32 :                                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    3185             :                         }
    3186             : 
    3187       20507 :                         bs_batch_close(batch);
    3188             :                 } else {
    3189             :                         /* Queue this operation and allocate the cluster */
    3190             :                         spdk_bs_user_op_t *op;
    3191             : 
    3192         344 :                         op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3193         344 :                         if (!op) {
    3194           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3195           0 :                                 return;
    3196             :                         }
    3197             : 
    3198         344 :                         bs_allocate_and_copy_cluster(blob, _ch, offset, op);
    3199             :                 }
    3200       20851 :                 break;
    3201             :         }
    3202          92 :         case SPDK_BLOB_UNMAP: {
    3203          92 :                 struct spdk_blob_free_cluster_ctx *ctx = NULL;
    3204             :                 spdk_bs_batch_t *batch;
    3205             : 
    3206             :                 /* if aligned with cluster release cluster */
    3207         160 :                 if (spdk_blob_is_thin_provisioned(blob) && is_allocated &&
    3208          68 :                     bs_io_units_per_cluster(blob) == length) {
    3209          60 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3210             :                         uint32_t cluster_start_page;
    3211             :                         uint32_t cluster_number;
    3212             : 
    3213          60 :                         assert(offset % bs_io_units_per_cluster(blob) == 0);
    3214             : 
    3215             :                         /* Round the io_unit offset down to the first page in the cluster */
    3216          60 :                         cluster_start_page = bs_io_unit_to_cluster_start(blob, offset);
    3217             : 
    3218             :                         /* Calculate which index in the metadata cluster array the corresponding
    3219             :                          * cluster is supposed to be at. */
    3220          60 :                         cluster_number = bs_io_unit_to_cluster_number(blob, offset);
    3221             : 
    3222          60 :                         ctx = calloc(1, sizeof(*ctx));
    3223          60 :                         if (!ctx) {
    3224           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3225           0 :                                 return;
    3226             :                         }
    3227             :                         /* When freeing a cluster the flow should be (in order):
    3228             :                          * 1. Unmap the underlying area (so if the cluster is reclaimed in the future, it won't leak
    3229             :                          * old data)
    3230             :                          * 2. Once the unmap completes (to avoid any races with incoming writes that may claim the
    3231             :                          * cluster), update and sync metadata freeing the cluster
    3232             :                          * 3. Once metadata update is done, complete the user unmap request
    3233             :                          */
    3234          60 :                         ctx->blob = blob;
    3235          60 :                         ctx->page = cluster_start_page;
    3236          60 :                         ctx->cluster_num = cluster_number;
    3237          60 :                         ctx->md_page = bs_channel->new_cluster_page;
    3238          60 :                         ctx->seq = bs_sequence_start_bs(_ch, &cpl);
    3239          60 :                         if (!ctx->seq) {
    3240           0 :                                 free(ctx);
    3241           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3242           0 :                                 return;
    3243             :                         }
    3244             : 
    3245          60 :                         if (blob->use_extent_table) {
    3246          30 :                                 ctx->extent_page = *bs_cluster_to_extent_page(blob, cluster_number);
    3247             :                         }
    3248             : 
    3249          60 :                         cpl.u.blob_basic.cb_fn = spdk_free_cluster_unmap_complete;
    3250          60 :                         cpl.u.blob_basic.cb_arg = ctx;
    3251             :                 }
    3252             : 
    3253          92 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3254          92 :                 if (!batch) {
    3255           0 :                         free(ctx);
    3256           0 :                         cb_fn(cb_arg, -ENOMEM);
    3257           0 :                         return;
    3258             :                 }
    3259             : 
    3260          92 :                 if (is_allocated) {
    3261          92 :                         bs_batch_unmap_dev(batch, lba, lba_count);
    3262             :                 }
    3263             : 
    3264          92 :                 bs_batch_close(batch);
    3265          92 :                 break;
    3266             :         }
    3267           0 :         case SPDK_BLOB_READV:
    3268             :         case SPDK_BLOB_WRITEV:
    3269           0 :                 SPDK_ERRLOG("readv/write not valid\n");
    3270           0 :                 cb_fn(cb_arg, -EINVAL);
    3271           0 :                 break;
    3272             :         }
    3273       37830 : }
    3274             : 
    3275             : static void
    3276       38524 : blob_request_submit_op(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3277             :                        void *payload, uint64_t offset, uint64_t length,
    3278             :                        spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3279             : {
    3280       38524 :         assert(blob != NULL);
    3281             : 
    3282       38524 :         if (blob->data_ro && op_type != SPDK_BLOB_READ) {
    3283           4 :                 cb_fn(cb_arg, -EPERM);
    3284           4 :                 return;
    3285             :         }
    3286             : 
    3287       38520 :         if (length == 0) {
    3288         492 :                 cb_fn(cb_arg, 0);
    3289         492 :                 return;
    3290             :         }
    3291             : 
    3292       38028 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3293          24 :                 cb_fn(cb_arg, -EINVAL);
    3294          24 :                 return;
    3295             :         }
    3296       38004 :         if (length <= bs_num_io_units_to_cluster_boundary(blob, offset)) {
    3297       37826 :                 blob_request_submit_op_single(_channel, blob, payload, offset, length,
    3298             :                                               cb_fn, cb_arg, op_type);
    3299             :         } else {
    3300         178 :                 blob_request_submit_op_split(_channel, blob, payload, offset, length,
    3301             :                                              cb_fn, cb_arg, op_type);
    3302             :         }
    3303             : }
    3304             : 
    3305             : struct rw_iov_ctx {
    3306             :         struct spdk_blob *blob;
    3307             :         struct spdk_io_channel *channel;
    3308             :         spdk_blob_op_complete cb_fn;
    3309             :         void *cb_arg;
    3310             :         bool read;
    3311             :         int iovcnt;
    3312             :         struct iovec *orig_iov;
    3313             :         uint64_t io_unit_offset;
    3314             :         uint64_t io_units_remaining;
    3315             :         uint64_t io_units_done;
    3316             :         struct spdk_blob_ext_io_opts *ext_io_opts;
    3317             :         struct iovec iov[0];
    3318             : };
    3319             : 
    3320             : static void
    3321        2360 : rw_iov_done(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    3322             : {
    3323        2360 :         assert(cb_arg == NULL);
    3324        2360 :         bs_sequence_finish(seq, bserrno);
    3325        2360 : }
    3326             : 
    3327             : static void
    3328         744 : rw_iov_split_next(void *cb_arg, int bserrno)
    3329             : {
    3330         744 :         struct rw_iov_ctx *ctx = cb_arg;
    3331         744 :         struct spdk_blob *blob = ctx->blob;
    3332             :         struct iovec *iov, *orig_iov;
    3333             :         int iovcnt;
    3334             :         size_t orig_iovoff;
    3335             :         uint64_t io_units_count, io_units_to_boundary, io_unit_offset;
    3336             :         uint64_t byte_count;
    3337             : 
    3338         744 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    3339         204 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
    3340         204 :                 free(ctx);
    3341         204 :                 return;
    3342             :         }
    3343             : 
    3344         540 :         io_unit_offset = ctx->io_unit_offset;
    3345         540 :         io_units_to_boundary = bs_num_io_units_to_cluster_boundary(blob, io_unit_offset);
    3346         540 :         io_units_count = spdk_min(ctx->io_units_remaining, io_units_to_boundary);
    3347             :         /*
    3348             :          * Get index and offset into the original iov array for our current position in the I/O sequence.
    3349             :          *  byte_count will keep track of how many bytes remaining until orig_iov and orig_iovoff will
    3350             :          *  point to the current position in the I/O sequence.
    3351             :          */
    3352         540 :         byte_count = ctx->io_units_done * blob->bs->io_unit_size;
    3353         540 :         orig_iov = &ctx->orig_iov[0];
    3354         540 :         orig_iovoff = 0;
    3355        1148 :         while (byte_count > 0) {
    3356         608 :                 if (byte_count >= orig_iov->iov_len) {
    3357         352 :                         byte_count -= orig_iov->iov_len;
    3358         352 :                         orig_iov++;
    3359             :                 } else {
    3360         256 :                         orig_iovoff = byte_count;
    3361         256 :                         byte_count = 0;
    3362             :                 }
    3363             :         }
    3364             : 
    3365             :         /*
    3366             :          * Build an iov array for the next I/O in the sequence.  byte_count will keep track of how many
    3367             :          *  bytes of this next I/O remain to be accounted for in the new iov array.
    3368             :          */
    3369         540 :         byte_count = io_units_count * blob->bs->io_unit_size;
    3370         540 :         iov = &ctx->iov[0];
    3371         540 :         iovcnt = 0;
    3372        1380 :         while (byte_count > 0) {
    3373         840 :                 assert(iovcnt < ctx->iovcnt);
    3374         840 :                 iov->iov_len = spdk_min(byte_count, orig_iov->iov_len - orig_iovoff);
    3375         840 :                 iov->iov_base = orig_iov->iov_base + orig_iovoff;
    3376         840 :                 byte_count -= iov->iov_len;
    3377         840 :                 orig_iovoff = 0;
    3378         840 :                 orig_iov++;
    3379         840 :                 iov++;
    3380         840 :                 iovcnt++;
    3381             :         }
    3382             : 
    3383         540 :         ctx->io_unit_offset += io_units_count;
    3384         540 :         ctx->io_units_remaining -= io_units_count;
    3385         540 :         ctx->io_units_done += io_units_count;
    3386         540 :         iov = &ctx->iov[0];
    3387             : 
    3388         540 :         if (ctx->read) {
    3389         408 :                 spdk_blob_io_readv_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3390             :                                        io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3391             :         } else {
    3392         132 :                 spdk_blob_io_writev_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3393             :                                         io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3394             :         }
    3395             : }
    3396             : 
    3397             : static void
    3398        2588 : blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3399             :                            struct iovec *iov, int iovcnt,
    3400             :                            uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg, bool read,
    3401             :                            struct spdk_blob_ext_io_opts *ext_io_opts)
    3402             : {
    3403        2588 :         struct spdk_bs_cpl      cpl;
    3404             : 
    3405        2588 :         assert(blob != NULL);
    3406             : 
    3407        2588 :         if (!read && blob->data_ro) {
    3408           4 :                 cb_fn(cb_arg, -EPERM);
    3409           4 :                 return;
    3410             :         }
    3411             : 
    3412        2584 :         if (length == 0) {
    3413           0 :                 cb_fn(cb_arg, 0);
    3414           0 :                 return;
    3415             :         }
    3416             : 
    3417        2584 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3418           0 :                 cb_fn(cb_arg, -EINVAL);
    3419           0 :                 return;
    3420             :         }
    3421             : 
    3422             :         /*
    3423             :          * For now, we implement readv/writev using a sequence (instead of a batch) to account for having
    3424             :          *  to split a request that spans a cluster boundary.  For I/O that do not span a cluster boundary,
    3425             :          *  there will be no noticeable difference compared to using a batch.  For I/O that do span a cluster
    3426             :          *  boundary, the target LBAs (after blob offset to LBA translation) may not be contiguous, so we need
    3427             :          *  to allocate a separate iov array and split the I/O such that none of the resulting
    3428             :          *  smaller I/O cross a cluster boundary.  These smaller I/O will be issued in sequence (not in parallel)
    3429             :          *  but since this case happens very infrequently, any performance impact will be negligible.
    3430             :          *
    3431             :          * This could be optimized in the future to allocate a big enough iov array to account for all of the iovs
    3432             :          *  for all of the smaller I/Os, pre-build all of the iov arrays for the smaller I/Os, then issue them
    3433             :          *  in a batch.  That would also require creating an intermediate spdk_bs_cpl that would get called
    3434             :          *  when the batch was completed, to allow for freeing the memory for the iov arrays.
    3435             :          */
    3436        2584 :         if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
    3437        2376 :                 uint64_t lba_count;
    3438        2376 :                 uint64_t lba;
    3439             :                 bool is_allocated;
    3440             : 
    3441        2376 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3442        2376 :                 cpl.u.blob_basic.cb_fn = cb_fn;
    3443        2376 :                 cpl.u.blob_basic.cb_arg = cb_arg;
    3444             : 
    3445        2376 :                 if (blob->frozen_refcnt) {
    3446             :                         /* This blob I/O is frozen */
    3447             :                         enum spdk_blob_op_type op_type;
    3448             :                         spdk_bs_user_op_t *op;
    3449           0 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_channel);
    3450             : 
    3451           0 :                         op_type = read ? SPDK_BLOB_READV : SPDK_BLOB_WRITEV;
    3452           0 :                         op = bs_user_op_alloc(_channel, &cpl, op_type, blob, iov, iovcnt, offset, length);
    3453           0 :                         if (!op) {
    3454           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3455           0 :                                 return;
    3456             :                         }
    3457             : 
    3458           0 :                         TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3459             : 
    3460           0 :                         return;
    3461             :                 }
    3462             : 
    3463        2376 :                 is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3464             : 
    3465        2376 :                 if (read) {
    3466             :                         spdk_bs_sequence_t *seq;
    3467             : 
    3468        2084 :                         seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3469        2084 :                         if (!seq) {
    3470           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3471           0 :                                 return;
    3472             :                         }
    3473             : 
    3474        2084 :                         seq->ext_io_opts = ext_io_opts;
    3475             : 
    3476        2084 :                         if (is_allocated) {
    3477         540 :                                 bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3478             :                         } else {
    3479        1544 :                                 bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
    3480             :                                                          rw_iov_done, NULL);
    3481             :                         }
    3482             :                 } else {
    3483         292 :                         if (is_allocated) {
    3484             :                                 spdk_bs_sequence_t *seq;
    3485             : 
    3486         276 :                                 seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3487         276 :                                 if (!seq) {
    3488           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3489           0 :                                         return;
    3490             :                                 }
    3491             : 
    3492         276 :                                 seq->ext_io_opts = ext_io_opts;
    3493             : 
    3494         276 :                                 bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3495             :                         } else {
    3496             :                                 /* Queue this operation and allocate the cluster */
    3497             :                                 spdk_bs_user_op_t *op;
    3498             : 
    3499          16 :                                 op = bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, blob, iov, iovcnt, offset,
    3500             :                                                       length);
    3501          16 :                                 if (!op) {
    3502           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3503           0 :                                         return;
    3504             :                                 }
    3505             : 
    3506          16 :                                 op->ext_io_opts = ext_io_opts;
    3507             : 
    3508          16 :                                 bs_allocate_and_copy_cluster(blob, _channel, offset, op);
    3509             :                         }
    3510             :                 }
    3511             :         } else {
    3512             :                 struct rw_iov_ctx *ctx;
    3513             : 
    3514         208 :                 ctx = calloc(1, sizeof(struct rw_iov_ctx) + iovcnt * sizeof(struct iovec));
    3515         208 :                 if (ctx == NULL) {
    3516           4 :                         cb_fn(cb_arg, -ENOMEM);
    3517           4 :                         return;
    3518             :                 }
    3519             : 
    3520         204 :                 ctx->blob = blob;
    3521         204 :                 ctx->channel = _channel;
    3522         204 :                 ctx->cb_fn = cb_fn;
    3523         204 :                 ctx->cb_arg = cb_arg;
    3524         204 :                 ctx->read = read;
    3525         204 :                 ctx->orig_iov = iov;
    3526         204 :                 ctx->iovcnt = iovcnt;
    3527         204 :                 ctx->io_unit_offset = offset;
    3528         204 :                 ctx->io_units_remaining = length;
    3529         204 :                 ctx->io_units_done = 0;
    3530         204 :                 ctx->ext_io_opts = ext_io_opts;
    3531             : 
    3532         204 :                 rw_iov_split_next(ctx, 0);
    3533             :         }
    3534             : }
    3535             : 
    3536             : static struct spdk_blob *
    3537        7709 : blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid)
    3538             : {
    3539        7709 :         struct spdk_blob find;
    3540             : 
    3541        7709 :         if (spdk_bit_array_get(bs->open_blobids, blobid) == 0) {
    3542        6932 :                 return NULL;
    3543             :         }
    3544             : 
    3545         777 :         find.id = blobid;
    3546         777 :         return RB_FIND(spdk_blob_tree, &bs->open_blobs, &find);
    3547             : }
    3548             : 
    3549             : static void
    3550        1798 : blob_get_snapshot_and_clone_entries(struct spdk_blob *blob,
    3551             :                                     struct spdk_blob_list **snapshot_entry, struct spdk_blob_list **clone_entry)
    3552             : {
    3553        1798 :         assert(blob != NULL);
    3554        1798 :         *snapshot_entry = NULL;
    3555        1798 :         *clone_entry = NULL;
    3556             : 
    3557        1798 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    3558        1518 :                 return;
    3559             :         }
    3560             : 
    3561         424 :         TAILQ_FOREACH(*snapshot_entry, &blob->bs->snapshots, link) {
    3562         372 :                 if ((*snapshot_entry)->id == blob->parent_id) {
    3563         228 :                         break;
    3564             :                 }
    3565             :         }
    3566             : 
    3567         280 :         if (*snapshot_entry != NULL) {
    3568         272 :                 TAILQ_FOREACH(*clone_entry, &(*snapshot_entry)->clones, link) {
    3569         272 :                         if ((*clone_entry)->id == blob->id) {
    3570         228 :                                 break;
    3571             :                         }
    3572             :                 }
    3573             : 
    3574         228 :                 assert(*clone_entry != NULL);
    3575             :         }
    3576             : }
    3577             : 
    3578             : static int
    3579         796 : bs_channel_create(void *io_device, void *ctx_buf)
    3580             : {
    3581         796 :         struct spdk_blob_store          *bs = io_device;
    3582         796 :         struct spdk_bs_channel          *channel = ctx_buf;
    3583             :         struct spdk_bs_dev              *dev;
    3584         796 :         uint32_t                        max_ops = bs->max_channel_ops;
    3585             :         uint32_t                        i;
    3586             : 
    3587         796 :         dev = bs->dev;
    3588             : 
    3589         796 :         channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set));
    3590         796 :         if (!channel->req_mem) {
    3591           0 :                 return -1;
    3592             :         }
    3593             : 
    3594         796 :         TAILQ_INIT(&channel->reqs);
    3595             : 
    3596      408348 :         for (i = 0; i < max_ops; i++) {
    3597      407552 :                 TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
    3598             :         }
    3599             : 
    3600         796 :         channel->bs = bs;
    3601         796 :         channel->dev = dev;
    3602         796 :         channel->dev_channel = dev->create_channel(dev);
    3603             : 
    3604         796 :         if (!channel->dev_channel) {
    3605           0 :                 SPDK_ERRLOG("Failed to create device channel.\n");
    3606           0 :                 free(channel->req_mem);
    3607           0 :                 return -1;
    3608             :         }
    3609             : 
    3610         796 :         channel->new_cluster_page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY,
    3611             :                                     SPDK_MALLOC_DMA);
    3612         796 :         if (!channel->new_cluster_page) {
    3613           0 :                 SPDK_ERRLOG("Failed to allocate new cluster page\n");
    3614           0 :                 free(channel->req_mem);
    3615           0 :                 channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3616           0 :                 return -1;
    3617             :         }
    3618             : 
    3619         796 :         TAILQ_INIT(&channel->need_cluster_alloc);
    3620         796 :         TAILQ_INIT(&channel->queued_io);
    3621         796 :         RB_INIT(&channel->esnap_channels);
    3622             : 
    3623         796 :         return 0;
    3624             : }
    3625             : 
    3626             : static void
    3627         796 : bs_channel_destroy(void *io_device, void *ctx_buf)
    3628             : {
    3629         796 :         struct spdk_bs_channel *channel = ctx_buf;
    3630             :         spdk_bs_user_op_t *op;
    3631             : 
    3632         796 :         while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
    3633           0 :                 op = TAILQ_FIRST(&channel->need_cluster_alloc);
    3634           0 :                 TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
    3635           0 :                 bs_user_op_abort(op, -EIO);
    3636             :         }
    3637             : 
    3638         796 :         while (!TAILQ_EMPTY(&channel->queued_io)) {
    3639           0 :                 op = TAILQ_FIRST(&channel->queued_io);
    3640           0 :                 TAILQ_REMOVE(&channel->queued_io, op, link);
    3641           0 :                 bs_user_op_abort(op, -EIO);
    3642             :         }
    3643             : 
    3644         796 :         blob_esnap_destroy_bs_channel(channel);
    3645             : 
    3646         796 :         free(channel->req_mem);
    3647         796 :         spdk_free(channel->new_cluster_page);
    3648         796 :         channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3649         796 : }
    3650             : 
    3651             : static void
    3652         780 : bs_dev_destroy(void *io_device)
    3653             : {
    3654         780 :         struct spdk_blob_store *bs = io_device;
    3655             :         struct spdk_blob        *blob, *blob_tmp;
    3656             : 
    3657         780 :         bs->dev->destroy(bs->dev);
    3658             : 
    3659         780 :         RB_FOREACH_SAFE(blob, spdk_blob_tree, &bs->open_blobs, blob_tmp) {
    3660           0 :                 RB_REMOVE(spdk_blob_tree, &bs->open_blobs, blob);
    3661           0 :                 spdk_bit_array_clear(bs->open_blobids, blob->id);
    3662           0 :                 blob_free(blob);
    3663             :         }
    3664             : 
    3665         780 :         spdk_spin_destroy(&bs->used_lock);
    3666             : 
    3667         780 :         spdk_bit_array_free(&bs->open_blobids);
    3668         780 :         spdk_bit_array_free(&bs->used_blobids);
    3669         780 :         spdk_bit_array_free(&bs->used_md_pages);
    3670         780 :         spdk_bit_pool_free(&bs->used_clusters);
    3671             :         /*
    3672             :          * If this function is called for any reason except a successful unload,
    3673             :          * the unload_cpl type will be NONE and this will be a nop.
    3674             :          */
    3675         780 :         bs_call_cpl(&bs->unload_cpl, bs->unload_err);
    3676             : 
    3677         780 :         free(bs);
    3678         780 : }
    3679             : 
    3680             : static int
    3681         900 : bs_blob_list_add(struct spdk_blob *blob)
    3682             : {
    3683             :         spdk_blob_id snapshot_id;
    3684         900 :         struct spdk_blob_list *snapshot_entry = NULL;
    3685         900 :         struct spdk_blob_list *clone_entry = NULL;
    3686             : 
    3687         900 :         assert(blob != NULL);
    3688             : 
    3689         900 :         snapshot_id = blob->parent_id;
    3690         900 :         if (snapshot_id == SPDK_BLOBID_INVALID ||
    3691             :             snapshot_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    3692         488 :                 return 0;
    3693             :         }
    3694             : 
    3695         412 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, snapshot_id);
    3696         412 :         if (snapshot_entry == NULL) {
    3697             :                 /* Snapshot not found */
    3698         284 :                 snapshot_entry = calloc(1, sizeof(struct spdk_blob_list));
    3699         284 :                 if (snapshot_entry == NULL) {
    3700           0 :                         return -ENOMEM;
    3701             :                 }
    3702         284 :                 snapshot_entry->id = snapshot_id;
    3703         284 :                 TAILQ_INIT(&snapshot_entry->clones);
    3704         284 :                 TAILQ_INSERT_TAIL(&blob->bs->snapshots, snapshot_entry, link);
    3705             :         } else {
    3706         204 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    3707          76 :                         if (clone_entry->id == blob->id) {
    3708           0 :                                 break;
    3709             :                         }
    3710             :                 }
    3711             :         }
    3712             : 
    3713         412 :         if (clone_entry == NULL) {
    3714             :                 /* Clone not found */
    3715         412 :                 clone_entry = calloc(1, sizeof(struct spdk_blob_list));
    3716         412 :                 if (clone_entry == NULL) {
    3717           0 :                         return -ENOMEM;
    3718             :                 }
    3719         412 :                 clone_entry->id = blob->id;
    3720         412 :                 TAILQ_INIT(&clone_entry->clones);
    3721         412 :                 TAILQ_INSERT_TAIL(&snapshot_entry->clones, clone_entry, link);
    3722         412 :                 snapshot_entry->clone_count++;
    3723             :         }
    3724             : 
    3725         412 :         return 0;
    3726             : }
    3727             : 
    3728             : static void
    3729        1720 : bs_blob_list_remove(struct spdk_blob *blob)
    3730             : {
    3731        1720 :         struct spdk_blob_list *snapshot_entry = NULL;
    3732        1720 :         struct spdk_blob_list *clone_entry = NULL;
    3733             : 
    3734        1720 :         blob_get_snapshot_and_clone_entries(blob, &snapshot_entry, &clone_entry);
    3735             : 
    3736        1720 :         if (snapshot_entry == NULL) {
    3737        1508 :                 return;
    3738             :         }
    3739             : 
    3740         212 :         blob->parent_id = SPDK_BLOBID_INVALID;
    3741         212 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3742         212 :         free(clone_entry);
    3743             : 
    3744         212 :         snapshot_entry->clone_count--;
    3745             : }
    3746             : 
    3747             : static int
    3748         780 : bs_blob_list_free(struct spdk_blob_store *bs)
    3749             : {
    3750             :         struct spdk_blob_list *snapshot_entry;
    3751             :         struct spdk_blob_list *snapshot_entry_tmp;
    3752             :         struct spdk_blob_list *clone_entry;
    3753             :         struct spdk_blob_list *clone_entry_tmp;
    3754             : 
    3755         924 :         TAILQ_FOREACH_SAFE(snapshot_entry, &bs->snapshots, link, snapshot_entry_tmp) {
    3756         296 :                 TAILQ_FOREACH_SAFE(clone_entry, &snapshot_entry->clones, link, clone_entry_tmp) {
    3757         152 :                         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3758         152 :                         free(clone_entry);
    3759             :                 }
    3760         144 :                 TAILQ_REMOVE(&bs->snapshots, snapshot_entry, link);
    3761         144 :                 free(snapshot_entry);
    3762             :         }
    3763             : 
    3764         780 :         return 0;
    3765             : }
    3766             : 
    3767             : static void
    3768         780 : bs_free(struct spdk_blob_store *bs)
    3769             : {
    3770         780 :         bs_blob_list_free(bs);
    3771             : 
    3772         780 :         bs_unregister_md_thread(bs);
    3773         780 :         spdk_io_device_unregister(bs, bs_dev_destroy);
    3774         780 : }
    3775             : 
    3776             : void
    3777        1048 : spdk_bs_opts_init(struct spdk_bs_opts *opts, size_t opts_size)
    3778             : {
    3779             : 
    3780        1048 :         if (!opts) {
    3781           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
    3782           0 :                 return;
    3783             :         }
    3784             : 
    3785        1048 :         if (!opts_size) {
    3786           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    3787           0 :                 return;
    3788             :         }
    3789             : 
    3790        1048 :         memset(opts, 0, opts_size);
    3791        1048 :         opts->opts_size = opts_size;
    3792             : 
    3793             : #define FIELD_OK(field) \
    3794             :         offsetof(struct spdk_bs_opts, field) + sizeof(opts->field) <= opts_size
    3795             : 
    3796             : #define SET_FIELD(field, value) \
    3797             :         if (FIELD_OK(field)) { \
    3798             :                 opts->field = value; \
    3799             :         } \
    3800             : 
    3801        1048 :         SET_FIELD(cluster_sz, SPDK_BLOB_OPTS_CLUSTER_SZ);
    3802        1048 :         SET_FIELD(num_md_pages, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3803        1048 :         SET_FIELD(max_md_ops, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3804        1048 :         SET_FIELD(max_channel_ops, SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS);
    3805        1048 :         SET_FIELD(clear_method,  BS_CLEAR_WITH_UNMAP);
    3806             : 
    3807        1048 :         if (FIELD_OK(bstype)) {
    3808        1048 :                 memset(&opts->bstype, 0, sizeof(opts->bstype));
    3809             :         }
    3810             : 
    3811        1048 :         SET_FIELD(iter_cb_fn, NULL);
    3812        1048 :         SET_FIELD(iter_cb_arg, NULL);
    3813        1048 :         SET_FIELD(force_recover, false);
    3814        1048 :         SET_FIELD(esnap_bs_dev_create, NULL);
    3815        1048 :         SET_FIELD(esnap_ctx, NULL);
    3816             : 
    3817             : #undef FIELD_OK
    3818             : #undef SET_FIELD
    3819             : }
    3820             : 
    3821             : static int
    3822         484 : bs_opts_verify(struct spdk_bs_opts *opts)
    3823             : {
    3824         484 :         if (opts->cluster_sz == 0 || opts->num_md_pages == 0 || opts->max_md_ops == 0 ||
    3825         480 :             opts->max_channel_ops == 0) {
    3826           4 :                 SPDK_ERRLOG("Blobstore options cannot be set to 0\n");
    3827           4 :                 return -1;
    3828             :         }
    3829             : 
    3830         480 :         return 0;
    3831             : }
    3832             : 
    3833             : /* START spdk_bs_load */
    3834             : 
    3835             : /* spdk_bs_load_ctx is used for init, load, unload and dump code paths. */
    3836             : 
    3837             : struct spdk_bs_load_ctx {
    3838             :         struct spdk_blob_store          *bs;
    3839             :         struct spdk_bs_super_block      *super;
    3840             : 
    3841             :         struct spdk_bs_md_mask          *mask;
    3842             :         bool                            in_page_chain;
    3843             :         uint32_t                        page_index;
    3844             :         uint32_t                        cur_page;
    3845             :         struct spdk_blob_md_page        *page;
    3846             : 
    3847             :         uint64_t                        num_extent_pages;
    3848             :         uint32_t                        *extent_page_num;
    3849             :         struct spdk_blob_md_page        *extent_pages;
    3850             :         struct spdk_bit_array           *used_clusters;
    3851             : 
    3852             :         spdk_bs_sequence_t                      *seq;
    3853             :         spdk_blob_op_with_handle_complete       iter_cb_fn;
    3854             :         void                                    *iter_cb_arg;
    3855             :         struct spdk_blob                        *blob;
    3856             :         spdk_blob_id                            blobid;
    3857             : 
    3858             :         bool                                    force_recover;
    3859             : 
    3860             :         /* These fields are used in the spdk_bs_dump path. */
    3861             :         bool                                    dumping;
    3862             :         FILE                                    *fp;
    3863             :         spdk_bs_dump_print_xattr                print_xattr_fn;
    3864             :         char                                    xattr_name[4096];
    3865             : };
    3866             : 
    3867             : static int
    3868         784 : bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, struct spdk_blob_store **_bs,
    3869             :          struct spdk_bs_load_ctx **_ctx)
    3870             : {
    3871             :         struct spdk_blob_store  *bs;
    3872             :         struct spdk_bs_load_ctx *ctx;
    3873             :         uint64_t dev_size;
    3874             :         int rc;
    3875             : 
    3876         784 :         dev_size = dev->blocklen * dev->blockcnt;
    3877         784 :         if (dev_size < opts->cluster_sz) {
    3878             :                 /* Device size cannot be smaller than cluster size of blobstore */
    3879           0 :                 SPDK_INFOLOG(blob, "Device size %" PRIu64 " is smaller than cluster size %" PRIu32 "\n",
    3880             :                              dev_size, opts->cluster_sz);
    3881           0 :                 return -ENOSPC;
    3882             :         }
    3883         784 :         if (opts->cluster_sz < SPDK_BS_PAGE_SIZE) {
    3884             :                 /* Cluster size cannot be smaller than page size */
    3885           4 :                 SPDK_ERRLOG("Cluster size %" PRIu32 " is smaller than page size %d\n",
    3886             :                             opts->cluster_sz, SPDK_BS_PAGE_SIZE);
    3887           4 :                 return -EINVAL;
    3888             :         }
    3889         780 :         bs = calloc(1, sizeof(struct spdk_blob_store));
    3890         780 :         if (!bs) {
    3891           0 :                 return -ENOMEM;
    3892             :         }
    3893             : 
    3894         780 :         ctx = calloc(1, sizeof(struct spdk_bs_load_ctx));
    3895         780 :         if (!ctx) {
    3896           0 :                 free(bs);
    3897           0 :                 return -ENOMEM;
    3898             :         }
    3899             : 
    3900         780 :         ctx->bs = bs;
    3901         780 :         ctx->iter_cb_fn = opts->iter_cb_fn;
    3902         780 :         ctx->iter_cb_arg = opts->iter_cb_arg;
    3903         780 :         ctx->force_recover = opts->force_recover;
    3904             : 
    3905         780 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    3906             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    3907         780 :         if (!ctx->super) {
    3908           0 :                 free(ctx);
    3909           0 :                 free(bs);
    3910           0 :                 return -ENOMEM;
    3911             :         }
    3912             : 
    3913         780 :         RB_INIT(&bs->open_blobs);
    3914         780 :         TAILQ_INIT(&bs->snapshots);
    3915         780 :         bs->dev = dev;
    3916         780 :         bs->md_thread = spdk_get_thread();
    3917         780 :         assert(bs->md_thread != NULL);
    3918             : 
    3919             :         /*
    3920             :          * Do not use bs_lba_to_cluster() here since blockcnt may not be an
    3921             :          *  even multiple of the cluster size.
    3922             :          */
    3923         780 :         bs->cluster_sz = opts->cluster_sz;
    3924         780 :         bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen);
    3925         780 :         ctx->used_clusters = spdk_bit_array_create(bs->total_clusters);
    3926         780 :         if (!ctx->used_clusters) {
    3927           0 :                 spdk_free(ctx->super);
    3928           0 :                 free(ctx);
    3929           0 :                 free(bs);
    3930           0 :                 return -ENOMEM;
    3931             :         }
    3932             : 
    3933         780 :         bs->pages_per_cluster = bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    3934         780 :         if (spdk_u32_is_pow2(bs->pages_per_cluster)) {
    3935         780 :                 bs->pages_per_cluster_shift = spdk_u32log2(bs->pages_per_cluster);
    3936             :         }
    3937         780 :         bs->num_free_clusters = bs->total_clusters;
    3938         780 :         bs->io_unit_size = dev->blocklen;
    3939             : 
    3940         780 :         bs->max_channel_ops = opts->max_channel_ops;
    3941         780 :         bs->super_blob = SPDK_BLOBID_INVALID;
    3942         780 :         memcpy(&bs->bstype, &opts->bstype, sizeof(opts->bstype));
    3943         780 :         bs->esnap_bs_dev_create = opts->esnap_bs_dev_create;
    3944         780 :         bs->esnap_ctx = opts->esnap_ctx;
    3945             : 
    3946             :         /* The metadata is assumed to be at least 1 page */
    3947         780 :         bs->used_md_pages = spdk_bit_array_create(1);
    3948         780 :         bs->used_blobids = spdk_bit_array_create(0);
    3949         780 :         bs->open_blobids = spdk_bit_array_create(0);
    3950             : 
    3951         780 :         spdk_spin_init(&bs->used_lock);
    3952             : 
    3953         780 :         spdk_io_device_register(bs, bs_channel_create, bs_channel_destroy,
    3954             :                                 sizeof(struct spdk_bs_channel), "blobstore");
    3955         780 :         rc = bs_register_md_thread(bs);
    3956         780 :         if (rc == -1) {
    3957           0 :                 spdk_io_device_unregister(bs, NULL);
    3958           0 :                 spdk_spin_destroy(&bs->used_lock);
    3959           0 :                 spdk_bit_array_free(&bs->open_blobids);
    3960           0 :                 spdk_bit_array_free(&bs->used_blobids);
    3961           0 :                 spdk_bit_array_free(&bs->used_md_pages);
    3962           0 :                 spdk_bit_array_free(&ctx->used_clusters);
    3963           0 :                 spdk_free(ctx->super);
    3964           0 :                 free(ctx);
    3965           0 :                 free(bs);
    3966             :                 /* FIXME: this is a lie but don't know how to get a proper error code here */
    3967           0 :                 return -ENOMEM;
    3968             :         }
    3969             : 
    3970         780 :         *_ctx = ctx;
    3971         780 :         *_bs = bs;
    3972         780 :         return 0;
    3973             : }
    3974             : 
    3975             : static void
    3976          24 : bs_load_ctx_fail(struct spdk_bs_load_ctx *ctx, int bserrno)
    3977             : {
    3978          24 :         assert(bserrno != 0);
    3979             : 
    3980          24 :         spdk_free(ctx->super);
    3981          24 :         bs_sequence_finish(ctx->seq, bserrno);
    3982          24 :         bs_free(ctx->bs);
    3983          24 :         spdk_bit_array_free(&ctx->used_clusters);
    3984          24 :         free(ctx);
    3985          24 : }
    3986             : 
    3987             : static void
    3988         824 : bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    3989             :                struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    3990             : {
    3991             :         /* Update the values in the super block */
    3992         824 :         super->super_blob = bs->super_blob;
    3993         824 :         memcpy(&super->bstype, &bs->bstype, sizeof(bs->bstype));
    3994         824 :         super->crc = blob_md_page_calc_crc(super);
    3995         824 :         bs_sequence_write_dev(seq, super, bs_page_to_lba(bs, 0),
    3996         824 :                               bs_byte_to_lba(bs, sizeof(*super)),
    3997             :                               cb_fn, cb_arg);
    3998         824 : }
    3999             : 
    4000             : static void
    4001         760 : bs_write_used_clusters(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4002             : {
    4003         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4004             :         uint64_t        mask_size, lba, lba_count;
    4005             : 
    4006             :         /* Write out the used clusters mask */
    4007         760 :         mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    4008         760 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4009             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4010         760 :         if (!ctx->mask) {
    4011           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4012           0 :                 return;
    4013             :         }
    4014             : 
    4015         760 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS;
    4016         760 :         ctx->mask->length = ctx->bs->total_clusters;
    4017             :         /* We could get here through the normal unload path, or through dirty
    4018             :          * shutdown recovery.  For the normal unload path, we use the mask from
    4019             :          * the bit pool.  For dirty shutdown recovery, we don't have a bit pool yet -
    4020             :          * only the bit array from the load ctx.
    4021             :          */
    4022         760 :         if (ctx->bs->used_clusters) {
    4023         654 :                 assert(ctx->mask->length == spdk_bit_pool_capacity(ctx->bs->used_clusters));
    4024         654 :                 spdk_bit_pool_store_mask(ctx->bs->used_clusters, ctx->mask->mask);
    4025             :         } else {
    4026         106 :                 assert(ctx->mask->length == spdk_bit_array_capacity(ctx->used_clusters));
    4027         106 :                 spdk_bit_array_store_mask(ctx->used_clusters, ctx->mask->mask);
    4028             :         }
    4029         760 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4030         760 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4031         760 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4032             : }
    4033             : 
    4034             : static void
    4035         760 : bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4036             : {
    4037         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4038             :         uint64_t        mask_size, lba, lba_count;
    4039             : 
    4040         760 :         mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
    4041         760 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4042             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4043         760 :         if (!ctx->mask) {
    4044           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4045           0 :                 return;
    4046             :         }
    4047             : 
    4048         760 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES;
    4049         760 :         ctx->mask->length = ctx->super->md_len;
    4050         760 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages));
    4051             : 
    4052         760 :         spdk_bit_array_store_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4053         760 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4054         760 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4055         760 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4056             : }
    4057             : 
    4058             : static void
    4059         760 : bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4060             : {
    4061         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4062             :         uint64_t        mask_size, lba, lba_count;
    4063             : 
    4064         760 :         if (ctx->super->used_blobid_mask_len == 0) {
    4065             :                 /*
    4066             :                  * This is a pre-v3 on-disk format where the blobid mask does not get
    4067             :                  *  written to disk.
    4068             :                  */
    4069          24 :                 cb_fn(seq, arg, 0);
    4070          24 :                 return;
    4071             :         }
    4072             : 
    4073         736 :         mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
    4074         736 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4075             :                                  SPDK_MALLOC_DMA);
    4076         736 :         if (!ctx->mask) {
    4077           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4078           0 :                 return;
    4079             :         }
    4080             : 
    4081         736 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
    4082         736 :         ctx->mask->length = ctx->super->md_len;
    4083         736 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
    4084             : 
    4085         736 :         spdk_bit_array_store_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4086         736 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4087         736 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4088         736 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4089             : }
    4090             : 
    4091             : static void
    4092         696 : blob_set_thin_provision(struct spdk_blob *blob)
    4093             : {
    4094         696 :         blob_verify_md_op(blob);
    4095         696 :         blob->invalid_flags |= SPDK_BLOB_THIN_PROV;
    4096         696 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4097         696 : }
    4098             : 
    4099             : static void
    4100        2086 : blob_set_clear_method(struct spdk_blob *blob, enum blob_clear_method clear_method)
    4101             : {
    4102        2086 :         blob_verify_md_op(blob);
    4103        2086 :         blob->clear_method = clear_method;
    4104        2086 :         blob->md_ro_flags |= (clear_method << SPDK_BLOB_CLEAR_METHOD_SHIFT);
    4105        2086 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4106        2086 : }
    4107             : 
    4108             : static void bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno);
    4109             : 
    4110             : static void
    4111          24 : bs_delete_corrupted_blob_cpl(void *cb_arg, int bserrno)
    4112             : {
    4113          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4114             :         spdk_blob_id id;
    4115             :         int64_t page_num;
    4116             : 
    4117             :         /* Iterate to next blob (we can't use spdk_bs_iter_next function as our
    4118             :          * last blob has been removed */
    4119          24 :         page_num = bs_blobid_to_page(ctx->blobid);
    4120          24 :         page_num++;
    4121          24 :         page_num = spdk_bit_array_find_first_set(ctx->bs->used_blobids, page_num);
    4122          24 :         if (page_num >= spdk_bit_array_capacity(ctx->bs->used_blobids)) {
    4123          24 :                 bs_load_iter(ctx, NULL, -ENOENT);
    4124          24 :                 return;
    4125             :         }
    4126             : 
    4127           0 :         id = bs_page_to_blobid(page_num);
    4128             : 
    4129           0 :         spdk_bs_open_blob(ctx->bs, id, bs_load_iter, ctx);
    4130             : }
    4131             : 
    4132             : static void
    4133          24 : bs_delete_corrupted_close_cb(void *cb_arg, int bserrno)
    4134             : {
    4135          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4136             : 
    4137          24 :         if (bserrno != 0) {
    4138           0 :                 SPDK_ERRLOG("Failed to close corrupted blob\n");
    4139           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4140           0 :                 return;
    4141             :         }
    4142             : 
    4143          24 :         spdk_bs_delete_blob(ctx->bs, ctx->blobid, bs_delete_corrupted_blob_cpl, ctx);
    4144             : }
    4145             : 
    4146             : static void
    4147          24 : bs_delete_corrupted_blob(void *cb_arg, int bserrno)
    4148             : {
    4149          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4150             :         uint64_t i;
    4151             : 
    4152          24 :         if (bserrno != 0) {
    4153           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4154           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4155           0 :                 return;
    4156             :         }
    4157             : 
    4158             :         /* Snapshot and clone have the same copy of cluster map and extent pages
    4159             :          * at this point. Let's clear both for snapshot now,
    4160             :          * so that it won't be cleared for clone later when we remove snapshot.
    4161             :          * Also set thin provision to pass data corruption check */
    4162         264 :         for (i = 0; i < ctx->blob->active.num_clusters; i++) {
    4163         240 :                 ctx->blob->active.clusters[i] = 0;
    4164             :         }
    4165          36 :         for (i = 0; i < ctx->blob->active.num_extent_pages; i++) {
    4166          12 :                 ctx->blob->active.extent_pages[i] = 0;
    4167             :         }
    4168             : 
    4169          24 :         ctx->blob->active.num_allocated_clusters = 0;
    4170             : 
    4171          24 :         ctx->blob->md_ro = false;
    4172             : 
    4173          24 :         blob_set_thin_provision(ctx->blob);
    4174             : 
    4175          24 :         ctx->blobid = ctx->blob->id;
    4176             : 
    4177          24 :         spdk_blob_close(ctx->blob, bs_delete_corrupted_close_cb, ctx);
    4178             : }
    4179             : 
    4180             : static void
    4181          12 : bs_update_corrupted_blob(void *cb_arg, int bserrno)
    4182             : {
    4183          12 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4184             : 
    4185          12 :         if (bserrno != 0) {
    4186           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4187           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4188           0 :                 return;
    4189             :         }
    4190             : 
    4191          12 :         ctx->blob->md_ro = false;
    4192          12 :         blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
    4193          12 :         blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
    4194          12 :         spdk_blob_set_read_only(ctx->blob);
    4195             : 
    4196          12 :         if (ctx->iter_cb_fn) {
    4197           0 :                 ctx->iter_cb_fn(ctx->iter_cb_arg, ctx->blob, 0);
    4198             :         }
    4199          12 :         bs_blob_list_add(ctx->blob);
    4200             : 
    4201          12 :         spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4202             : }
    4203             : 
    4204             : static void
    4205          36 : bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
    4206             : {
    4207          36 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4208             : 
    4209          36 :         if (bserrno != 0) {
    4210           0 :                 SPDK_ERRLOG("Failed to open clone of a corrupted blob\n");
    4211           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4212           0 :                 return;
    4213             :         }
    4214             : 
    4215          36 :         if (blob->parent_id == ctx->blob->id) {
    4216             :                 /* Power failure occurred before updating clone (snapshot delete case)
    4217             :                  * or after updating clone (creating snapshot case) - keep snapshot */
    4218          12 :                 spdk_blob_close(blob, bs_update_corrupted_blob, ctx);
    4219             :         } else {
    4220             :                 /* Power failure occurred after updating clone (snapshot delete case)
    4221             :                  * or before updating clone (creating snapshot case) - remove snapshot */
    4222          24 :                 spdk_blob_close(blob, bs_delete_corrupted_blob, ctx);
    4223             :         }
    4224             : }
    4225             : 
    4226             : static void
    4227         720 : bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
    4228             : {
    4229         720 :         struct spdk_bs_load_ctx *ctx = arg;
    4230         720 :         const void *value;
    4231         720 :         size_t len;
    4232         720 :         int rc = 0;
    4233             : 
    4234         720 :         if (bserrno == 0) {
    4235             :                 /* Examine blob if it is corrupted after power failure. Fix
    4236             :                  * the ones that can be fixed and remove any other corrupted
    4237             :                  * ones. If it is not corrupted just process it */
    4238         440 :                 rc = blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
    4239         440 :                 if (rc != 0) {
    4240         420 :                         rc = blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
    4241         420 :                         if (rc != 0) {
    4242             :                                 /* Not corrupted - process it and continue with iterating through blobs */
    4243         404 :                                 if (ctx->iter_cb_fn) {
    4244          34 :                                         ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
    4245             :                                 }
    4246         404 :                                 bs_blob_list_add(blob);
    4247         404 :                                 spdk_bs_iter_next(ctx->bs, blob, bs_load_iter, ctx);
    4248         404 :                                 return;
    4249             :                         }
    4250             : 
    4251             :                 }
    4252             : 
    4253          36 :                 assert(len == sizeof(spdk_blob_id));
    4254             : 
    4255          36 :                 ctx->blob = blob;
    4256             : 
    4257             :                 /* Open clone to check if we are able to fix this blob or should we remove it */
    4258          36 :                 spdk_bs_open_blob(ctx->bs, *(spdk_blob_id *)value, bs_examine_clone, ctx);
    4259          36 :                 return;
    4260         280 :         } else if (bserrno == -ENOENT) {
    4261         280 :                 bserrno = 0;
    4262             :         } else {
    4263             :                 /*
    4264             :                  * This case needs to be looked at further.  Same problem
    4265             :                  *  exists with applications that rely on explicit blob
    4266             :                  *  iteration.  We should just skip the blob that failed
    4267             :                  *  to load and continue on to the next one.
    4268             :                  */
    4269           0 :                 SPDK_ERRLOG("Error in iterating blobs\n");
    4270             :         }
    4271             : 
    4272         280 :         ctx->iter_cb_fn = NULL;
    4273             : 
    4274         280 :         spdk_free(ctx->super);
    4275         280 :         spdk_free(ctx->mask);
    4276         280 :         bs_sequence_finish(ctx->seq, bserrno);
    4277         280 :         free(ctx);
    4278             : }
    4279             : 
    4280             : static void bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
    4281             : 
    4282             : static void
    4283         280 : bs_load_complete(struct spdk_bs_load_ctx *ctx)
    4284             : {
    4285         280 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    4286         280 :         if (ctx->dumping) {
    4287           0 :                 bs_dump_read_md_page(ctx->seq, ctx);
    4288           0 :                 return;
    4289             :         }
    4290         280 :         spdk_bs_iter_first(ctx->bs, bs_load_iter, ctx);
    4291             : }
    4292             : 
    4293             : static void
    4294         174 : bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4295             : {
    4296         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4297             :         int rc;
    4298             : 
    4299             :         /* The type must be correct */
    4300         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
    4301             : 
    4302             :         /* The length of the mask (in bits) must not be greater than
    4303             :          * the length of the buffer (converted to bits) */
    4304         174 :         assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE * 8));
    4305             : 
    4306             :         /* The length of the mask must be exactly equal to the size
    4307             :          * (in pages) of the metadata region */
    4308         174 :         assert(ctx->mask->length == ctx->super->md_len);
    4309             : 
    4310         174 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length);
    4311         174 :         if (rc < 0) {
    4312           0 :                 spdk_free(ctx->mask);
    4313           0 :                 bs_load_ctx_fail(ctx, rc);
    4314           0 :                 return;
    4315             :         }
    4316             : 
    4317         174 :         spdk_bit_array_load_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4318         174 :         bs_load_complete(ctx);
    4319             : }
    4320             : 
    4321             : static void
    4322         174 : bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4323             : {
    4324         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4325             :         uint64_t                lba, lba_count, mask_size;
    4326             :         int                     rc;
    4327             : 
    4328         174 :         if (bserrno != 0) {
    4329           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4330           0 :                 return;
    4331             :         }
    4332             : 
    4333             :         /* The type must be correct */
    4334         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    4335             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4336         174 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    4337             :                                              struct spdk_blob_md_page) * 8));
    4338             :         /*
    4339             :          * The length of the mask must be equal to or larger than the total number of clusters. It may be
    4340             :          * larger than the total number of clusters due to a failure spdk_bs_grow.
    4341             :          */
    4342         174 :         assert(ctx->mask->length >= ctx->bs->total_clusters);
    4343         174 :         if (ctx->mask->length > ctx->bs->total_clusters) {
    4344           4 :                 SPDK_WARNLOG("Shrink the used_custers mask length to total_clusters");
    4345           4 :                 ctx->mask->length = ctx->bs->total_clusters;
    4346             :         }
    4347             : 
    4348         174 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->mask->length);
    4349         174 :         if (rc < 0) {
    4350           0 :                 spdk_free(ctx->mask);
    4351           0 :                 bs_load_ctx_fail(ctx, rc);
    4352           0 :                 return;
    4353             :         }
    4354             : 
    4355         174 :         spdk_bit_array_load_mask(ctx->used_clusters, ctx->mask->mask);
    4356         174 :         ctx->bs->num_free_clusters = spdk_bit_array_count_clear(ctx->used_clusters);
    4357         174 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    4358             : 
    4359         174 :         spdk_free(ctx->mask);
    4360             : 
    4361             :         /* Read the used blobids mask */
    4362         174 :         mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
    4363         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4364             :                                  SPDK_MALLOC_DMA);
    4365         174 :         if (!ctx->mask) {
    4366           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4367           0 :                 return;
    4368             :         }
    4369         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4370         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4371         174 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4372             :                              bs_load_used_blobids_cpl, ctx);
    4373             : }
    4374             : 
    4375             : static void
    4376         174 : bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4377             : {
    4378         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4379             :         uint64_t                lba, lba_count, mask_size;
    4380             :         int                     rc;
    4381             : 
    4382         174 :         if (bserrno != 0) {
    4383           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4384           0 :                 return;
    4385             :         }
    4386             : 
    4387             :         /* The type must be correct */
    4388         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES);
    4389             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4390         174 :         assert(ctx->mask->length <= (ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE *
    4391             :                                      8));
    4392             :         /* The length of the mask must be exactly equal to the size (in pages) of the metadata region */
    4393         174 :         if (ctx->mask->length != ctx->super->md_len) {
    4394           0 :                 SPDK_ERRLOG("mismatched md_len in used_pages mask: "
    4395             :                             "mask->length=%" PRIu32 " super->md_len=%" PRIu32 "\n",
    4396             :                             ctx->mask->length, ctx->super->md_len);
    4397           0 :                 assert(false);
    4398             :         }
    4399             : 
    4400         174 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->mask->length);
    4401         174 :         if (rc < 0) {
    4402           0 :                 spdk_free(ctx->mask);
    4403           0 :                 bs_load_ctx_fail(ctx, rc);
    4404           0 :                 return;
    4405             :         }
    4406             : 
    4407         174 :         spdk_bit_array_load_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4408         174 :         spdk_free(ctx->mask);
    4409             : 
    4410             :         /* Read the used clusters mask */
    4411         174 :         mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    4412         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4413             :                                  SPDK_MALLOC_DMA);
    4414         174 :         if (!ctx->mask) {
    4415           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4416           0 :                 return;
    4417             :         }
    4418         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4419         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4420         174 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4421             :                              bs_load_used_clusters_cpl, ctx);
    4422             : }
    4423             : 
    4424             : static void
    4425         174 : bs_load_read_used_pages(struct spdk_bs_load_ctx *ctx)
    4426             : {
    4427             :         uint64_t lba, lba_count, mask_size;
    4428             : 
    4429             :         /* Read the used pages mask */
    4430         174 :         mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
    4431         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4432             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4433         174 :         if (!ctx->mask) {
    4434           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4435           0 :                 return;
    4436             :         }
    4437             : 
    4438         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4439         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4440         174 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    4441             :                              bs_load_used_pages_cpl, ctx);
    4442             : }
    4443             : 
    4444             : static int
    4445         246 : bs_load_replay_md_parse_page(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_page *page)
    4446             : {
    4447         246 :         struct spdk_blob_store *bs = ctx->bs;
    4448             :         struct spdk_blob_md_descriptor *desc;
    4449         246 :         size_t  cur_desc = 0;
    4450             : 
    4451         246 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4452         718 :         while (cur_desc < sizeof(page->descriptors)) {
    4453         718 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    4454         226 :                         if (desc->length == 0) {
    4455             :                                 /* If padding and length are 0, this terminates the page */
    4456         226 :                                 break;
    4457             :                         }
    4458         492 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    4459             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    4460             :                         unsigned int                            i, j;
    4461          68 :                         unsigned int                            cluster_count = 0;
    4462             :                         uint32_t                                cluster_idx;
    4463             : 
    4464          68 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    4465             : 
    4466         136 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    4467         828 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
    4468         760 :                                         cluster_idx = desc_extent_rle->extents[i].cluster_idx;
    4469             :                                         /*
    4470             :                                          * cluster_idx = 0 means an unallocated cluster - don't mark that
    4471             :                                          * in the used cluster map.
    4472             :                                          */
    4473         760 :                                         if (cluster_idx != 0) {
    4474         540 :                                                 SPDK_NOTICELOG("Recover: cluster %" PRIu32 "\n", cluster_idx + j);
    4475         540 :                                                 spdk_bit_array_set(ctx->used_clusters, cluster_idx + j);
    4476         540 :                                                 if (bs->num_free_clusters == 0) {
    4477           0 :                                                         return -ENOSPC;
    4478             :                                                 }
    4479         540 :                                                 bs->num_free_clusters--;
    4480             :                                         }
    4481         760 :                                         cluster_count++;
    4482             :                                 }
    4483             :                         }
    4484          68 :                         if (cluster_count == 0) {
    4485           0 :                                 return -EINVAL;
    4486             :                         }
    4487         424 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4488             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    4489             :                         uint32_t                                        i;
    4490          52 :                         uint32_t                                        cluster_count = 0;
    4491             :                         uint32_t                                        cluster_idx;
    4492             :                         size_t                                          cluster_idx_length;
    4493             : 
    4494          52 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    4495          52 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
    4496             : 
    4497          52 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
    4498          52 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
    4499           0 :                                 return -EINVAL;
    4500             :                         }
    4501             : 
    4502         652 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
    4503         600 :                                 cluster_idx = desc_extent->cluster_idx[i];
    4504             :                                 /*
    4505             :                                  * cluster_idx = 0 means an unallocated cluster - don't mark that
    4506             :                                  * in the used cluster map.
    4507             :                                  */
    4508         600 :                                 if (cluster_idx != 0) {
    4509         600 :                                         if (cluster_idx < desc_extent->start_cluster_idx &&
    4510           0 :                                             cluster_idx >= desc_extent->start_cluster_idx + cluster_count) {
    4511           0 :                                                 return -EINVAL;
    4512             :                                         }
    4513         600 :                                         spdk_bit_array_set(ctx->used_clusters, cluster_idx);
    4514         600 :                                         if (bs->num_free_clusters == 0) {
    4515           0 :                                                 return -ENOSPC;
    4516             :                                         }
    4517         600 :                                         bs->num_free_clusters--;
    4518             :                                 }
    4519         600 :                                 cluster_count++;
    4520             :                         }
    4521             : 
    4522          52 :                         if (cluster_count == 0) {
    4523           0 :                                 return -EINVAL;
    4524             :                         }
    4525         372 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    4526             :                         /* Skip this item */
    4527         296 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    4528             :                         /* Skip this item */
    4529         236 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    4530             :                         /* Skip this item */
    4531          82 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    4532             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
    4533          82 :                         uint32_t num_extent_pages = ctx->num_extent_pages;
    4534             :                         uint32_t i;
    4535             :                         size_t extent_pages_length;
    4536             :                         void *tmp;
    4537             : 
    4538          82 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
    4539          82 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
    4540             : 
    4541          82 :                         if (desc_extent_table->length == 0 ||
    4542          82 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
    4543           0 :                                 return -EINVAL;
    4544             :                         }
    4545             : 
    4546         160 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4547          78 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
    4548          52 :                                         if (desc_extent_table->extent_page[i].num_pages != 1) {
    4549           0 :                                                 return -EINVAL;
    4550             :                                         }
    4551          52 :                                         num_extent_pages += 1;
    4552             :                                 }
    4553             :                         }
    4554             : 
    4555          82 :                         if (num_extent_pages > 0) {
    4556          52 :                                 tmp = realloc(ctx->extent_page_num, num_extent_pages * sizeof(uint32_t));
    4557          52 :                                 if (tmp == NULL) {
    4558           0 :                                         return -ENOMEM;
    4559             :                                 }
    4560          52 :                                 ctx->extent_page_num = tmp;
    4561             : 
    4562             :                                 /* Extent table entries contain md page numbers for extent pages.
    4563             :                                  * Zeroes represent unallocated extent pages, those are run-length-encoded.
    4564             :                                  */
    4565         104 :                                 for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4566          52 :                                         if (desc_extent_table->extent_page[i].page_idx != 0) {
    4567          52 :                                                 ctx->extent_page_num[ctx->num_extent_pages] = desc_extent_table->extent_page[i].page_idx;
    4568          52 :                                                 ctx->num_extent_pages += 1;
    4569             :                                         }
    4570             :                                 }
    4571             :                         }
    4572             :                 } else {
    4573             :                         /* Error */
    4574           0 :                         return -EINVAL;
    4575             :                 }
    4576             :                 /* Advance to the next descriptor */
    4577         492 :                 cur_desc += sizeof(*desc) + desc->length;
    4578         492 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    4579          20 :                         break;
    4580             :                 }
    4581         472 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    4582             :         }
    4583         246 :         return 0;
    4584             : }
    4585             : 
    4586             : static bool
    4587        1296 : bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page)
    4588             : {
    4589             :         uint32_t crc;
    4590        1296 :         struct spdk_blob_md_descriptor *desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4591             :         size_t desc_len;
    4592             : 
    4593        1296 :         crc = blob_md_page_calc_crc(page);
    4594        1296 :         if (crc != page->crc) {
    4595           0 :                 return false;
    4596             :         }
    4597             : 
    4598             :         /* Extent page should always be of sequence num 0. */
    4599        1296 :         if (page->sequence_num != 0) {
    4600          44 :                 return false;
    4601             :         }
    4602             : 
    4603             :         /* Descriptor type must be EXTENT_PAGE. */
    4604        1252 :         if (desc->type != SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4605         154 :                 return false;
    4606             :         }
    4607             : 
    4608             :         /* Descriptor length cannot exceed the page. */
    4609        1098 :         desc_len = sizeof(*desc) + desc->length;
    4610        1098 :         if (desc_len > sizeof(page->descriptors)) {
    4611           0 :                 return false;
    4612             :         }
    4613             : 
    4614             :         /* It has to be the only descriptor in the page. */
    4615        1098 :         if (desc_len + sizeof(*desc) <= sizeof(page->descriptors)) {
    4616        1098 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + desc_len);
    4617        1098 :                 if (desc->length != 0) {
    4618           0 :                         return false;
    4619             :                 }
    4620             :         }
    4621             : 
    4622        1098 :         return true;
    4623             : }
    4624             : 
    4625             : static bool
    4626        6754 : bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
    4627             : {
    4628             :         uint32_t crc;
    4629        6754 :         struct spdk_blob_md_page *page = ctx->page;
    4630             : 
    4631        6754 :         crc = blob_md_page_calc_crc(page);
    4632        6754 :         if (crc != page->crc) {
    4633        6538 :                 return false;
    4634             :         }
    4635             : 
    4636             :         /* First page of a sequence should match the blobid. */
    4637         216 :         if (page->sequence_num == 0 &&
    4638         172 :             bs_page_to_blobid(ctx->cur_page) != page->id) {
    4639          18 :                 return false;
    4640             :         }
    4641         198 :         assert(bs_load_cur_extent_page_valid(page) == false);
    4642             : 
    4643         198 :         return true;
    4644             : }
    4645             : 
    4646             : static void bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx);
    4647             : 
    4648             : static void
    4649         106 : bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4650             : {
    4651         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4652             : 
    4653         106 :         if (bserrno != 0) {
    4654           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4655           0 :                 return;
    4656             :         }
    4657             : 
    4658         106 :         bs_load_complete(ctx);
    4659             : }
    4660             : 
    4661             : static void
    4662         106 : bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4663             : {
    4664         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4665             : 
    4666         106 :         spdk_free(ctx->mask);
    4667         106 :         ctx->mask = NULL;
    4668             : 
    4669         106 :         if (bserrno != 0) {
    4670           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4671           0 :                 return;
    4672             :         }
    4673             : 
    4674         106 :         bs_write_used_clusters(seq, ctx, bs_load_write_used_clusters_cpl);
    4675             : }
    4676             : 
    4677             : static void
    4678         106 : bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4679             : {
    4680         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4681             : 
    4682         106 :         spdk_free(ctx->mask);
    4683         106 :         ctx->mask = NULL;
    4684             : 
    4685         106 :         if (bserrno != 0) {
    4686           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4687           0 :                 return;
    4688             :         }
    4689             : 
    4690         106 :         bs_write_used_blobids(seq, ctx, bs_load_write_used_blobids_cpl);
    4691             : }
    4692             : 
    4693             : static void
    4694         106 : bs_load_write_used_md(struct spdk_bs_load_ctx *ctx)
    4695             : {
    4696         106 :         bs_write_used_md(ctx->seq, ctx, bs_load_write_used_pages_cpl);
    4697         106 : }
    4698             : 
    4699             : static void
    4700        6714 : bs_load_replay_md_chain_cpl(struct spdk_bs_load_ctx *ctx)
    4701             : {
    4702             :         uint64_t num_md_clusters;
    4703             :         uint64_t i;
    4704             : 
    4705        6714 :         ctx->in_page_chain = false;
    4706             : 
    4707             :         do {
    4708        6784 :                 ctx->page_index++;
    4709        6784 :         } while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
    4710             : 
    4711        6714 :         if (ctx->page_index < ctx->super->md_len) {
    4712        6608 :                 ctx->cur_page = ctx->page_index;
    4713        6608 :                 bs_load_replay_cur_md_page(ctx);
    4714             :         } else {
    4715             :                 /* Claim all of the clusters used by the metadata */
    4716         106 :                 num_md_clusters = spdk_divide_round_up(
    4717         106 :                                           ctx->super->md_start + ctx->super->md_len, ctx->bs->pages_per_cluster);
    4718         480 :                 for (i = 0; i < num_md_clusters; i++) {
    4719         374 :                         spdk_bit_array_set(ctx->used_clusters, i);
    4720             :                 }
    4721         106 :                 ctx->bs->num_free_clusters -= num_md_clusters;
    4722         106 :                 spdk_free(ctx->page);
    4723         106 :                 bs_load_write_used_md(ctx);
    4724             :         }
    4725        6714 : }
    4726             : 
    4727             : static void
    4728          52 : bs_load_replay_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4729             : {
    4730          52 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4731             :         uint32_t page_num;
    4732             :         uint64_t i;
    4733             : 
    4734          52 :         if (bserrno != 0) {
    4735           0 :                 spdk_free(ctx->extent_pages);
    4736           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4737           0 :                 return;
    4738             :         }
    4739             : 
    4740         104 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4741             :                 /* Extent pages are only read when present within in chain md.
    4742             :                  * Integrity of md is not right if that page was not a valid extent page. */
    4743          52 :                 if (bs_load_cur_extent_page_valid(&ctx->extent_pages[i]) != true) {
    4744           0 :                         spdk_free(ctx->extent_pages);
    4745           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4746           0 :                         return;
    4747             :                 }
    4748             : 
    4749          52 :                 page_num = ctx->extent_page_num[i];
    4750          52 :                 spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
    4751          52 :                 if (bs_load_replay_md_parse_page(ctx, &ctx->extent_pages[i])) {
    4752           0 :                         spdk_free(ctx->extent_pages);
    4753           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4754           0 :                         return;
    4755             :                 }
    4756             :         }
    4757             : 
    4758          52 :         spdk_free(ctx->extent_pages);
    4759          52 :         free(ctx->extent_page_num);
    4760          52 :         ctx->extent_page_num = NULL;
    4761          52 :         ctx->num_extent_pages = 0;
    4762             : 
    4763          52 :         bs_load_replay_md_chain_cpl(ctx);
    4764             : }
    4765             : 
    4766             : static void
    4767          52 : bs_load_replay_extent_pages(struct spdk_bs_load_ctx *ctx)
    4768             : {
    4769             :         spdk_bs_batch_t *batch;
    4770             :         uint32_t page;
    4771             :         uint64_t lba;
    4772             :         uint64_t i;
    4773             : 
    4774          52 :         ctx->extent_pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE * ctx->num_extent_pages, 0,
    4775             :                                          NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4776          52 :         if (!ctx->extent_pages) {
    4777           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4778           0 :                 return;
    4779             :         }
    4780             : 
    4781          52 :         batch = bs_sequence_to_batch(ctx->seq, bs_load_replay_extent_page_cpl, ctx);
    4782             : 
    4783         104 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4784          52 :                 page = ctx->extent_page_num[i];
    4785          52 :                 assert(page < ctx->super->md_len);
    4786          52 :                 lba = bs_md_page_to_lba(ctx->bs, page);
    4787          52 :                 bs_batch_read_dev(batch, &ctx->extent_pages[i], lba,
    4788          52 :                                   bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE));
    4789             :         }
    4790             : 
    4791          52 :         bs_batch_close(batch);
    4792             : }
    4793             : 
    4794             : static void
    4795        6754 : bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4796             : {
    4797        6754 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4798             :         uint32_t page_num;
    4799             :         struct spdk_blob_md_page *page;
    4800             : 
    4801        6754 :         if (bserrno != 0) {
    4802           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4803           0 :                 return;
    4804             :         }
    4805             : 
    4806        6754 :         page_num = ctx->cur_page;
    4807        6754 :         page = ctx->page;
    4808        6754 :         if (bs_load_cur_md_page_valid(ctx) == true) {
    4809         198 :                 if (page->sequence_num == 0 || ctx->in_page_chain == true) {
    4810         194 :                         spdk_spin_lock(&ctx->bs->used_lock);
    4811         194 :                         bs_claim_md_page(ctx->bs, page_num);
    4812         194 :                         spdk_spin_unlock(&ctx->bs->used_lock);
    4813         194 :                         if (page->sequence_num == 0) {
    4814         154 :                                 SPDK_NOTICELOG("Recover: blob 0x%" PRIx32 "\n", page_num);
    4815         154 :                                 spdk_bit_array_set(ctx->bs->used_blobids, page_num);
    4816             :                         }
    4817         194 :                         if (bs_load_replay_md_parse_page(ctx, page)) {
    4818           0 :                                 bs_load_ctx_fail(ctx, -EILSEQ);
    4819           0 :                                 return;
    4820             :                         }
    4821         194 :                         if (page->next != SPDK_INVALID_MD_PAGE) {
    4822          40 :                                 ctx->in_page_chain = true;
    4823          40 :                                 ctx->cur_page = page->next;
    4824          40 :                                 bs_load_replay_cur_md_page(ctx);
    4825          40 :                                 return;
    4826             :                         }
    4827         154 :                         if (ctx->num_extent_pages != 0) {
    4828          52 :                                 bs_load_replay_extent_pages(ctx);
    4829          52 :                                 return;
    4830             :                         }
    4831             :                 }
    4832             :         }
    4833        6662 :         bs_load_replay_md_chain_cpl(ctx);
    4834             : }
    4835             : 
    4836             : static void
    4837        6754 : bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx)
    4838             : {
    4839             :         uint64_t lba;
    4840             : 
    4841        6754 :         assert(ctx->cur_page < ctx->super->md_len);
    4842        6754 :         lba = bs_md_page_to_lba(ctx->bs, ctx->cur_page);
    4843        6754 :         bs_sequence_read_dev(ctx->seq, ctx->page, lba,
    4844        6754 :                              bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    4845             :                              bs_load_replay_md_cpl, ctx);
    4846        6754 : }
    4847             : 
    4848             : static void
    4849         106 : bs_load_replay_md(struct spdk_bs_load_ctx *ctx)
    4850             : {
    4851         106 :         ctx->page_index = 0;
    4852         106 :         ctx->cur_page = 0;
    4853         106 :         ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    4854             :                                  NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4855         106 :         if (!ctx->page) {
    4856           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4857           0 :                 return;
    4858             :         }
    4859         106 :         bs_load_replay_cur_md_page(ctx);
    4860             : }
    4861             : 
    4862             : static void
    4863         106 : bs_recover(struct spdk_bs_load_ctx *ctx)
    4864             : {
    4865             :         int             rc;
    4866             : 
    4867         106 :         SPDK_NOTICELOG("Performing recovery on blobstore\n");
    4868         106 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
    4869         106 :         if (rc < 0) {
    4870           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4871           0 :                 return;
    4872             :         }
    4873             : 
    4874         106 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
    4875         106 :         if (rc < 0) {
    4876           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4877           0 :                 return;
    4878             :         }
    4879             : 
    4880         106 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4881         106 :         if (rc < 0) {
    4882           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4883           0 :                 return;
    4884             :         }
    4885             : 
    4886         106 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->super->md_len);
    4887         106 :         if (rc < 0) {
    4888           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4889           0 :                 return;
    4890             :         }
    4891             : 
    4892         106 :         ctx->bs->num_free_clusters = ctx->bs->total_clusters;
    4893         106 :         bs_load_replay_md(ctx);
    4894             : }
    4895             : 
    4896             : static int
    4897         276 : bs_parse_super(struct spdk_bs_load_ctx *ctx)
    4898             : {
    4899             :         int rc;
    4900             : 
    4901         276 :         if (ctx->super->size == 0) {
    4902           8 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    4903             :         }
    4904             : 
    4905         276 :         if (ctx->super->io_unit_size == 0) {
    4906           8 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    4907             :         }
    4908             : 
    4909         276 :         ctx->bs->clean = 1;
    4910         276 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    4911         276 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    4912         276 :         ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    4913         276 :         if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
    4914         276 :                 ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
    4915             :         }
    4916         276 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    4917         276 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4918         276 :         if (rc < 0) {
    4919           0 :                 return -ENOMEM;
    4920             :         }
    4921         276 :         ctx->bs->md_start = ctx->super->md_start;
    4922         276 :         ctx->bs->md_len = ctx->super->md_len;
    4923         276 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    4924         276 :         if (rc < 0) {
    4925           0 :                 return -ENOMEM;
    4926             :         }
    4927             : 
    4928         552 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    4929         276 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    4930         276 :         ctx->bs->super_blob = ctx->super->super_blob;
    4931         276 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    4932             : 
    4933         276 :         return 0;
    4934             : }
    4935             : 
    4936             : static void
    4937         300 : bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4938             : {
    4939         300 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4940             :         int rc;
    4941             : 
    4942         300 :         rc = bs_super_validate(ctx->super, ctx->bs);
    4943         300 :         if (rc != 0) {
    4944          24 :                 bs_load_ctx_fail(ctx, rc);
    4945          24 :                 return;
    4946             :         }
    4947             : 
    4948         276 :         rc = bs_parse_super(ctx);
    4949         276 :         if (rc < 0) {
    4950           0 :                 bs_load_ctx_fail(ctx, rc);
    4951           0 :                 return;
    4952             :         }
    4953             : 
    4954         276 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0 || ctx->force_recover) {
    4955         106 :                 bs_recover(ctx);
    4956             :         } else {
    4957         170 :                 bs_load_read_used_pages(ctx);
    4958             :         }
    4959             : }
    4960             : 
    4961             : static inline int
    4962         308 : bs_opts_copy(struct spdk_bs_opts *src, struct spdk_bs_opts *dst)
    4963             : {
    4964             : 
    4965         308 :         if (!src->opts_size) {
    4966           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    4967           0 :                 return -1;
    4968             :         }
    4969             : 
    4970             : #define FIELD_OK(field) \
    4971             :         offsetof(struct spdk_bs_opts, field) + sizeof(src->field) <= src->opts_size
    4972             : 
    4973             : #define SET_FIELD(field) \
    4974             :         if (FIELD_OK(field)) { \
    4975             :                 dst->field = src->field; \
    4976             :         } \
    4977             : 
    4978         308 :         SET_FIELD(cluster_sz);
    4979         308 :         SET_FIELD(num_md_pages);
    4980         308 :         SET_FIELD(max_md_ops);
    4981         308 :         SET_FIELD(max_channel_ops);
    4982         308 :         SET_FIELD(clear_method);
    4983             : 
    4984         308 :         if (FIELD_OK(bstype)) {
    4985         308 :                 memcpy(&dst->bstype, &src->bstype, sizeof(dst->bstype));
    4986             :         }
    4987         308 :         SET_FIELD(iter_cb_fn);
    4988         308 :         SET_FIELD(iter_cb_arg);
    4989         308 :         SET_FIELD(force_recover);
    4990         308 :         SET_FIELD(esnap_bs_dev_create);
    4991         308 :         SET_FIELD(esnap_ctx);
    4992             : 
    4993         308 :         dst->opts_size = src->opts_size;
    4994             : 
    4995             :         /* You should not remove this statement, but need to update the assert statement
    4996             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    4997             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_opts) == 88, "Incorrect size");
    4998             : 
    4999             : #undef FIELD_OK
    5000             : #undef SET_FIELD
    5001             : 
    5002         308 :         return 0;
    5003             : }
    5004             : 
    5005             : void
    5006         312 : spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5007             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5008             : {
    5009         312 :         struct spdk_blob_store  *bs;
    5010         312 :         struct spdk_bs_cpl      cpl;
    5011         312 :         struct spdk_bs_load_ctx *ctx;
    5012         312 :         struct spdk_bs_opts     opts = {};
    5013             :         int err;
    5014             : 
    5015         312 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    5016             : 
    5017         312 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    5018           4 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    5019           4 :                 dev->destroy(dev);
    5020           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5021           4 :                 return;
    5022             :         }
    5023             : 
    5024         308 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5025         308 :         if (o) {
    5026         122 :                 if (bs_opts_copy(o, &opts)) {
    5027           0 :                         return;
    5028             :                 }
    5029             :         }
    5030             : 
    5031         308 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
    5032           8 :                 dev->destroy(dev);
    5033           8 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5034           8 :                 return;
    5035             :         }
    5036             : 
    5037         300 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5038         300 :         if (err) {
    5039           0 :                 dev->destroy(dev);
    5040           0 :                 cb_fn(cb_arg, NULL, err);
    5041           0 :                 return;
    5042             :         }
    5043             : 
    5044         300 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5045         300 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5046         300 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5047         300 :         cpl.u.bs_handle.bs = bs;
    5048             : 
    5049         300 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5050         300 :         if (!ctx->seq) {
    5051           0 :                 spdk_free(ctx->super);
    5052           0 :                 free(ctx);
    5053           0 :                 bs_free(bs);
    5054           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5055           0 :                 return;
    5056             :         }
    5057             : 
    5058             :         /* Read the super block */
    5059         300 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5060         300 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5061             :                              bs_load_super_cpl, ctx);
    5062             : }
    5063             : 
    5064             : /* END spdk_bs_load */
    5065             : 
    5066             : /* START spdk_bs_dump */
    5067             : 
    5068             : static void
    5069           0 : bs_dump_finish(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
    5070             : {
    5071           0 :         spdk_free(ctx->super);
    5072             : 
    5073             :         /*
    5074             :          * We need to defer calling bs_call_cpl() until after
    5075             :          * dev destruction, so tuck these away for later use.
    5076             :          */
    5077           0 :         ctx->bs->unload_err = bserrno;
    5078           0 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5079           0 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5080             : 
    5081           0 :         bs_sequence_finish(seq, 0);
    5082           0 :         bs_free(ctx->bs);
    5083           0 :         free(ctx);
    5084           0 : }
    5085             : 
    5086             : static void
    5087           0 : bs_dump_print_xattr(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5088             : {
    5089             :         struct spdk_blob_md_descriptor_xattr *desc_xattr;
    5090             :         uint32_t i;
    5091             :         const char *type;
    5092             : 
    5093           0 :         desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc;
    5094             : 
    5095           0 :         if (desc_xattr->length !=
    5096             :             sizeof(desc_xattr->name_length) + sizeof(desc_xattr->value_length) +
    5097           0 :             desc_xattr->name_length + desc_xattr->value_length) {
    5098             :         }
    5099             : 
    5100           0 :         memcpy(ctx->xattr_name, desc_xattr->name, desc_xattr->name_length);
    5101           0 :         ctx->xattr_name[desc_xattr->name_length] = '\0';
    5102           0 :         if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5103           0 :                 type = "XATTR";
    5104           0 :         } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5105           0 :                 type = "XATTR_INTERNAL";
    5106             :         } else {
    5107           0 :                 assert(false);
    5108             :                 type = "XATTR_?";
    5109             :         }
    5110           0 :         fprintf(ctx->fp, "%s: name = \"%s\"\n", type, ctx->xattr_name);
    5111           0 :         fprintf(ctx->fp, "       value = \"");
    5112           0 :         ctx->print_xattr_fn(ctx->fp, ctx->super->bstype.bstype, ctx->xattr_name,
    5113           0 :                             (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
    5114           0 :                             desc_xattr->value_length);
    5115           0 :         fprintf(ctx->fp, "\"\n");
    5116           0 :         for (i = 0; i < desc_xattr->value_length; i++) {
    5117           0 :                 if (i % 16 == 0) {
    5118           0 :                         fprintf(ctx->fp, "               ");
    5119             :                 }
    5120           0 :                 fprintf(ctx->fp, "%02" PRIx8 " ", *((uint8_t *)desc_xattr->name + desc_xattr->name_length + i));
    5121           0 :                 if ((i + 1) % 16 == 0) {
    5122           0 :                         fprintf(ctx->fp, "\n");
    5123             :                 }
    5124             :         }
    5125           0 :         if (i % 16 != 0) {
    5126           0 :                 fprintf(ctx->fp, "\n");
    5127             :         }
    5128           0 : }
    5129             : 
    5130             : struct type_flag_desc {
    5131             :         uint64_t mask;
    5132             :         uint64_t val;
    5133             :         const char *name;
    5134             : };
    5135             : 
    5136             : static void
    5137           0 : bs_dump_print_type_bits(struct spdk_bs_load_ctx *ctx, uint64_t flags,
    5138             :                         struct type_flag_desc *desc, size_t numflags)
    5139             : {
    5140           0 :         uint64_t covered = 0;
    5141             :         size_t i;
    5142             : 
    5143           0 :         for (i = 0; i < numflags; i++) {
    5144           0 :                 if ((desc[i].mask & flags) != desc[i].val) {
    5145           0 :                         continue;
    5146             :                 }
    5147           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " %s", desc[i].val, desc[i].name);
    5148           0 :                 if (desc[i].mask != desc[i].val) {
    5149           0 :                         fprintf(ctx->fp, " (mask 0x%" PRIx64 " value 0x%" PRIx64 ")",
    5150           0 :                                 desc[i].mask, desc[i].val);
    5151             :                 }
    5152           0 :                 fprintf(ctx->fp, "\n");
    5153           0 :                 covered |= desc[i].mask;
    5154             :         }
    5155           0 :         if ((flags & ~covered) != 0) {
    5156           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " Unknown\n", flags & ~covered);
    5157             :         }
    5158           0 : }
    5159             : 
    5160             : static void
    5161           0 : bs_dump_print_type_flags(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5162             : {
    5163             :         struct spdk_blob_md_descriptor_flags *type_desc;
    5164             : #define ADD_FLAG(f) { f, f, #f }
    5165             : #define ADD_MASK_VAL(m, v) { m, v, #v }
    5166             :         static struct type_flag_desc invalid[] = {
    5167             :                 ADD_FLAG(SPDK_BLOB_THIN_PROV),
    5168             :                 ADD_FLAG(SPDK_BLOB_INTERNAL_XATTR),
    5169             :                 ADD_FLAG(SPDK_BLOB_EXTENT_TABLE),
    5170             :         };
    5171             :         static struct type_flag_desc data_ro[] = {
    5172             :                 ADD_FLAG(SPDK_BLOB_READ_ONLY),
    5173             :         };
    5174             :         static struct type_flag_desc md_ro[] = {
    5175             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_DEFAULT),
    5176             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_NONE),
    5177             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_UNMAP),
    5178             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_WRITE_ZEROES),
    5179             :         };
    5180             : #undef ADD_FLAG
    5181             : #undef ADD_MASK_VAL
    5182             : 
    5183           0 :         type_desc = (struct spdk_blob_md_descriptor_flags *)desc;
    5184           0 :         fprintf(ctx->fp, "Flags:\n");
    5185           0 :         fprintf(ctx->fp, "\tinvalid: 0x%016" PRIx64 "\n", type_desc->invalid_flags);
    5186           0 :         bs_dump_print_type_bits(ctx, type_desc->invalid_flags, invalid,
    5187             :                                 SPDK_COUNTOF(invalid));
    5188           0 :         fprintf(ctx->fp, "\tdata_ro: 0x%016" PRIx64 "\n", type_desc->data_ro_flags);
    5189           0 :         bs_dump_print_type_bits(ctx, type_desc->data_ro_flags, data_ro,
    5190             :                                 SPDK_COUNTOF(data_ro));
    5191           0 :         fprintf(ctx->fp, "\t  md_ro: 0x%016" PRIx64 "\n", type_desc->md_ro_flags);
    5192           0 :         bs_dump_print_type_bits(ctx, type_desc->md_ro_flags, md_ro,
    5193             :                                 SPDK_COUNTOF(md_ro));
    5194           0 : }
    5195             : 
    5196             : static void
    5197           0 : bs_dump_print_extent_table(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5198             : {
    5199             :         struct spdk_blob_md_descriptor_extent_table *et_desc;
    5200             :         uint64_t num_extent_pages;
    5201             :         uint32_t et_idx;
    5202             : 
    5203           0 :         et_desc = (struct spdk_blob_md_descriptor_extent_table *)desc;
    5204           0 :         num_extent_pages = (et_desc->length - sizeof(et_desc->num_clusters)) /
    5205             :                            sizeof(et_desc->extent_page[0]);
    5206             : 
    5207           0 :         fprintf(ctx->fp, "Extent table:\n");
    5208           0 :         for (et_idx = 0; et_idx < num_extent_pages; et_idx++) {
    5209           0 :                 if (et_desc->extent_page[et_idx].page_idx == 0) {
    5210             :                         /* Zeroes represent unallocated extent pages. */
    5211           0 :                         continue;
    5212             :                 }
    5213           0 :                 fprintf(ctx->fp, "\tExtent page: %5" PRIu32 " length %3" PRIu32
    5214             :                         " at LBA %" PRIu64 "\n", et_desc->extent_page[et_idx].page_idx,
    5215             :                         et_desc->extent_page[et_idx].num_pages,
    5216             :                         bs_md_page_to_lba(ctx->bs, et_desc->extent_page[et_idx].page_idx));
    5217             :         }
    5218           0 : }
    5219             : 
    5220             : static void
    5221           0 : bs_dump_print_md_page(struct spdk_bs_load_ctx *ctx)
    5222             : {
    5223           0 :         uint32_t page_idx = ctx->cur_page;
    5224           0 :         struct spdk_blob_md_page *page = ctx->page;
    5225             :         struct spdk_blob_md_descriptor *desc;
    5226           0 :         size_t cur_desc = 0;
    5227             :         uint32_t crc;
    5228             : 
    5229           0 :         fprintf(ctx->fp, "=========\n");
    5230           0 :         fprintf(ctx->fp, "Metadata Page Index: %" PRIu32 " (0x%" PRIx32 ")\n", page_idx, page_idx);
    5231           0 :         fprintf(ctx->fp, "Start LBA: %" PRIu64 "\n", bs_md_page_to_lba(ctx->bs, page_idx));
    5232           0 :         fprintf(ctx->fp, "Blob ID: 0x%" PRIx64 "\n", page->id);
    5233           0 :         fprintf(ctx->fp, "Sequence: %" PRIu32 "\n", page->sequence_num);
    5234           0 :         if (page->next == SPDK_INVALID_MD_PAGE) {
    5235           0 :                 fprintf(ctx->fp, "Next: None\n");
    5236             :         } else {
    5237           0 :                 fprintf(ctx->fp, "Next: %" PRIu32 "\n", page->next);
    5238             :         }
    5239           0 :         fprintf(ctx->fp, "In used bit array%s:", ctx->super->clean ? "" : " (not clean: dubious)");
    5240           0 :         if (spdk_bit_array_get(ctx->bs->used_md_pages, page_idx)) {
    5241           0 :                 fprintf(ctx->fp, " md");
    5242             :         }
    5243           0 :         if (spdk_bit_array_get(ctx->bs->used_blobids, page_idx)) {
    5244           0 :                 fprintf(ctx->fp, " blob");
    5245             :         }
    5246           0 :         fprintf(ctx->fp, "\n");
    5247             : 
    5248           0 :         crc = blob_md_page_calc_crc(page);
    5249           0 :         fprintf(ctx->fp, "CRC: 0x%" PRIx32 " (%s)\n", page->crc, crc == page->crc ? "OK" : "Mismatch");
    5250             : 
    5251           0 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    5252           0 :         while (cur_desc < sizeof(page->descriptors)) {
    5253           0 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    5254           0 :                         if (desc->length == 0) {
    5255             :                                 /* If padding and length are 0, this terminates the page */
    5256           0 :                                 break;
    5257             :                         }
    5258           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    5259             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    5260             :                         unsigned int                            i;
    5261             : 
    5262           0 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    5263             : 
    5264           0 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    5265           0 :                                 if (desc_extent_rle->extents[i].cluster_idx != 0) {
    5266           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5267             :                                                 desc_extent_rle->extents[i].cluster_idx);
    5268             :                                 } else {
    5269           0 :                                         fprintf(ctx->fp, "Unallocated Extent - ");
    5270             :                                 }
    5271           0 :                                 fprintf(ctx->fp, " Length: %" PRIu32, desc_extent_rle->extents[i].length);
    5272           0 :                                 fprintf(ctx->fp, "\n");
    5273             :                         }
    5274           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    5275             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    5276             :                         unsigned int                                    i;
    5277             : 
    5278           0 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    5279             : 
    5280           0 :                         for (i = 0; i < desc_extent->length / sizeof(desc_extent->cluster_idx[0]); i++) {
    5281           0 :                                 if (desc_extent->cluster_idx[i] != 0) {
    5282           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5283             :                                                 desc_extent->cluster_idx[i]);
    5284             :                                 } else {
    5285           0 :                                         fprintf(ctx->fp, "Unallocated Extent");
    5286             :                                 }
    5287           0 :                                 fprintf(ctx->fp, "\n");
    5288             :                         }
    5289           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5290           0 :                         bs_dump_print_xattr(ctx, desc);
    5291           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5292           0 :                         bs_dump_print_xattr(ctx, desc);
    5293           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    5294           0 :                         bs_dump_print_type_flags(ctx, desc);
    5295           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    5296           0 :                         bs_dump_print_extent_table(ctx, desc);
    5297             :                 } else {
    5298             :                         /* Error */
    5299           0 :                         fprintf(ctx->fp, "Unknown descriptor type %" PRIu8 "\n", desc->type);
    5300             :                 }
    5301             :                 /* Advance to the next descriptor */
    5302           0 :                 cur_desc += sizeof(*desc) + desc->length;
    5303           0 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    5304           0 :                         break;
    5305             :                 }
    5306           0 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    5307             :         }
    5308           0 : }
    5309             : 
    5310             : static void
    5311           0 : bs_dump_read_md_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5312             : {
    5313           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5314             : 
    5315           0 :         if (bserrno != 0) {
    5316           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5317           0 :                 return;
    5318             :         }
    5319             : 
    5320           0 :         if (ctx->page->id != 0) {
    5321           0 :                 bs_dump_print_md_page(ctx);
    5322             :         }
    5323             : 
    5324           0 :         ctx->cur_page++;
    5325             : 
    5326           0 :         if (ctx->cur_page < ctx->super->md_len) {
    5327           0 :                 bs_dump_read_md_page(seq, ctx);
    5328             :         } else {
    5329           0 :                 spdk_free(ctx->page);
    5330           0 :                 bs_dump_finish(seq, ctx, 0);
    5331             :         }
    5332             : }
    5333             : 
    5334             : static void
    5335           0 : bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
    5336             : {
    5337           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5338             :         uint64_t lba;
    5339             : 
    5340           0 :         assert(ctx->cur_page < ctx->super->md_len);
    5341           0 :         lba = bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
    5342           0 :         bs_sequence_read_dev(seq, ctx->page, lba,
    5343           0 :                              bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    5344             :                              bs_dump_read_md_page_cpl, ctx);
    5345           0 : }
    5346             : 
    5347             : static void
    5348           0 : bs_dump_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5349             : {
    5350           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5351             :         int rc;
    5352             : 
    5353           0 :         fprintf(ctx->fp, "Signature: \"%.8s\" ", ctx->super->signature);
    5354           0 :         if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5355             :                    sizeof(ctx->super->signature)) != 0) {
    5356           0 :                 fprintf(ctx->fp, "(Mismatch)\n");
    5357           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5358           0 :                 return;
    5359             :         } else {
    5360           0 :                 fprintf(ctx->fp, "(OK)\n");
    5361             :         }
    5362           0 :         fprintf(ctx->fp, "Version: %" PRIu32 "\n", ctx->super->version);
    5363           0 :         fprintf(ctx->fp, "CRC: 0x%x (%s)\n", ctx->super->crc,
    5364           0 :                 (ctx->super->crc == blob_md_page_calc_crc(ctx->super)) ? "OK" : "Mismatch");
    5365           0 :         fprintf(ctx->fp, "Blobstore Type: %.*s\n", SPDK_BLOBSTORE_TYPE_LENGTH, ctx->super->bstype.bstype);
    5366           0 :         fprintf(ctx->fp, "Cluster Size: %" PRIu32 "\n", ctx->super->cluster_size);
    5367           0 :         fprintf(ctx->fp, "Super Blob ID: ");
    5368           0 :         if (ctx->super->super_blob == SPDK_BLOBID_INVALID) {
    5369           0 :                 fprintf(ctx->fp, "(None)\n");
    5370             :         } else {
    5371           0 :                 fprintf(ctx->fp, "0x%" PRIx64 "\n", ctx->super->super_blob);
    5372             :         }
    5373           0 :         fprintf(ctx->fp, "Clean: %" PRIu32 "\n", ctx->super->clean);
    5374           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Start: %" PRIu32 "\n", ctx->super->used_page_mask_start);
    5375           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Length: %" PRIu32 "\n", ctx->super->used_page_mask_len);
    5376           0 :         fprintf(ctx->fp, "Used Cluster Mask Start: %" PRIu32 "\n", ctx->super->used_cluster_mask_start);
    5377           0 :         fprintf(ctx->fp, "Used Cluster Mask Length: %" PRIu32 "\n", ctx->super->used_cluster_mask_len);
    5378           0 :         fprintf(ctx->fp, "Used Blob ID Mask Start: %" PRIu32 "\n", ctx->super->used_blobid_mask_start);
    5379           0 :         fprintf(ctx->fp, "Used Blob ID Mask Length: %" PRIu32 "\n", ctx->super->used_blobid_mask_len);
    5380           0 :         fprintf(ctx->fp, "Metadata Start: %" PRIu32 "\n", ctx->super->md_start);
    5381           0 :         fprintf(ctx->fp, "Metadata Length: %" PRIu32 "\n", ctx->super->md_len);
    5382             : 
    5383           0 :         ctx->cur_page = 0;
    5384           0 :         ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    5385             :                                  NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5386           0 :         if (!ctx->page) {
    5387           0 :                 bs_dump_finish(seq, ctx, -ENOMEM);
    5388           0 :                 return;
    5389             :         }
    5390             : 
    5391           0 :         rc = bs_parse_super(ctx);
    5392           0 :         if (rc < 0) {
    5393           0 :                 bs_load_ctx_fail(ctx, rc);
    5394           0 :                 return;
    5395             :         }
    5396             : 
    5397           0 :         bs_load_read_used_pages(ctx);
    5398             : }
    5399             : 
    5400             : void
    5401           0 : spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
    5402             :              spdk_bs_op_complete cb_fn, void *cb_arg)
    5403             : {
    5404           0 :         struct spdk_blob_store  *bs;
    5405           0 :         struct spdk_bs_cpl      cpl;
    5406           0 :         struct spdk_bs_load_ctx *ctx;
    5407           0 :         struct spdk_bs_opts     opts = {};
    5408             :         int err;
    5409             : 
    5410           0 :         SPDK_DEBUGLOG(blob, "Dumping blobstore from dev %p\n", dev);
    5411             : 
    5412           0 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5413             : 
    5414           0 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5415           0 :         if (err) {
    5416           0 :                 dev->destroy(dev);
    5417           0 :                 cb_fn(cb_arg, err);
    5418           0 :                 return;
    5419             :         }
    5420             : 
    5421           0 :         ctx->dumping = true;
    5422           0 :         ctx->fp = fp;
    5423           0 :         ctx->print_xattr_fn = print_xattr_fn;
    5424             : 
    5425           0 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5426           0 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5427           0 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5428             : 
    5429           0 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5430           0 :         if (!ctx->seq) {
    5431           0 :                 spdk_free(ctx->super);
    5432           0 :                 free(ctx);
    5433           0 :                 bs_free(bs);
    5434           0 :                 cb_fn(cb_arg, -ENOMEM);
    5435           0 :                 return;
    5436             :         }
    5437             : 
    5438             :         /* Read the super block */
    5439           0 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5440           0 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5441             :                              bs_dump_super_cpl, ctx);
    5442             : }
    5443             : 
    5444             : /* END spdk_bs_dump */
    5445             : 
    5446             : /* START spdk_bs_init */
    5447             : 
    5448             : static void
    5449         472 : bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5450             : {
    5451         472 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5452             : 
    5453         472 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    5454         472 :         spdk_free(ctx->super);
    5455         472 :         free(ctx);
    5456             : 
    5457         472 :         bs_sequence_finish(seq, bserrno);
    5458         472 : }
    5459             : 
    5460             : static void
    5461         472 : bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5462             : {
    5463         472 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5464             : 
    5465             :         /* Write super block */
    5466         472 :         bs_sequence_write_dev(seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    5467         472 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    5468             :                               bs_init_persist_super_cpl, ctx);
    5469         472 : }
    5470             : 
    5471             : void
    5472         488 : spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5473             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5474             : {
    5475         488 :         struct spdk_bs_load_ctx *ctx;
    5476         488 :         struct spdk_blob_store  *bs;
    5477         488 :         struct spdk_bs_cpl      cpl;
    5478             :         spdk_bs_sequence_t      *seq;
    5479             :         spdk_bs_batch_t         *batch;
    5480             :         uint64_t                num_md_lba;
    5481             :         uint64_t                num_md_pages;
    5482             :         uint64_t                num_md_clusters;
    5483             :         uint64_t                max_used_cluster_mask_len;
    5484             :         uint32_t                i;
    5485         488 :         struct spdk_bs_opts     opts = {};
    5486             :         int                     rc;
    5487             :         uint64_t                lba, lba_count;
    5488             : 
    5489         488 :         SPDK_DEBUGLOG(blob, "Initializing blobstore on dev %p\n", dev);
    5490             : 
    5491         488 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    5492           4 :                 SPDK_ERRLOG("unsupported dev block length of %d\n",
    5493             :                             dev->blocklen);
    5494           4 :                 dev->destroy(dev);
    5495           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5496           4 :                 return;
    5497             :         }
    5498             : 
    5499         484 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5500         484 :         if (o) {
    5501         182 :                 if (bs_opts_copy(o, &opts)) {
    5502           0 :                         return;
    5503             :                 }
    5504             :         }
    5505             : 
    5506         484 :         if (bs_opts_verify(&opts) != 0) {
    5507           4 :                 dev->destroy(dev);
    5508           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5509           4 :                 return;
    5510             :         }
    5511             : 
    5512         480 :         rc = bs_alloc(dev, &opts, &bs, &ctx);
    5513         480 :         if (rc) {
    5514           4 :                 dev->destroy(dev);
    5515           4 :                 cb_fn(cb_arg, NULL, rc);
    5516           4 :                 return;
    5517             :         }
    5518             : 
    5519         476 :         if (opts.num_md_pages == SPDK_BLOB_OPTS_NUM_MD_PAGES) {
    5520             :                 /* By default, allocate 1 page per cluster.
    5521             :                  * Technically, this over-allocates metadata
    5522             :                  * because more metadata will reduce the number
    5523             :                  * of usable clusters. This can be addressed with
    5524             :                  * more complex math in the future.
    5525             :                  */
    5526         468 :                 bs->md_len = bs->total_clusters;
    5527             :         } else {
    5528           8 :                 bs->md_len = opts.num_md_pages;
    5529             :         }
    5530         476 :         rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len);
    5531         476 :         if (rc < 0) {
    5532           0 :                 spdk_free(ctx->super);
    5533           0 :                 free(ctx);
    5534           0 :                 bs_free(bs);
    5535           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5536           0 :                 return;
    5537             :         }
    5538             : 
    5539         476 :         rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
    5540         476 :         if (rc < 0) {
    5541           0 :                 spdk_free(ctx->super);
    5542           0 :                 free(ctx);
    5543           0 :                 bs_free(bs);
    5544           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5545           0 :                 return;
    5546             :         }
    5547             : 
    5548         476 :         rc = spdk_bit_array_resize(&bs->open_blobids, bs->md_len);
    5549         476 :         if (rc < 0) {
    5550           0 :                 spdk_free(ctx->super);
    5551           0 :                 free(ctx);
    5552           0 :                 bs_free(bs);
    5553           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5554           0 :                 return;
    5555             :         }
    5556             : 
    5557         476 :         memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5558             :                sizeof(ctx->super->signature));
    5559         476 :         ctx->super->version = SPDK_BS_VERSION;
    5560         476 :         ctx->super->length = sizeof(*ctx->super);
    5561         476 :         ctx->super->super_blob = bs->super_blob;
    5562         476 :         ctx->super->clean = 0;
    5563         476 :         ctx->super->cluster_size = bs->cluster_sz;
    5564         476 :         ctx->super->io_unit_size = bs->io_unit_size;
    5565         476 :         memcpy(&ctx->super->bstype, &bs->bstype, sizeof(bs->bstype));
    5566             : 
    5567             :         /* Calculate how many pages the metadata consumes at the front
    5568             :          * of the disk.
    5569             :          */
    5570             : 
    5571             :         /* The super block uses 1 page */
    5572         476 :         num_md_pages = 1;
    5573             : 
    5574             :         /* The used_md_pages mask requires 1 bit per metadata page, rounded
    5575             :          * up to the nearest page, plus a header.
    5576             :          */
    5577         476 :         ctx->super->used_page_mask_start = num_md_pages;
    5578         476 :         ctx->super->used_page_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5579         476 :                                          spdk_divide_round_up(bs->md_len, 8),
    5580             :                                          SPDK_BS_PAGE_SIZE);
    5581         476 :         num_md_pages += ctx->super->used_page_mask_len;
    5582             : 
    5583             :         /* The used_clusters mask requires 1 bit per cluster, rounded
    5584             :          * up to the nearest page, plus a header.
    5585             :          */
    5586         476 :         ctx->super->used_cluster_mask_start = num_md_pages;
    5587         476 :         ctx->super->used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5588         476 :                                             spdk_divide_round_up(bs->total_clusters, 8),
    5589             :                                             SPDK_BS_PAGE_SIZE);
    5590             :         /* The blobstore might be extended, then the used_cluster bitmap will need more space.
    5591             :          * Here we calculate the max clusters we can support according to the
    5592             :          * num_md_pages (bs->md_len).
    5593             :          */
    5594         476 :         max_used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5595         476 :                                     spdk_divide_round_up(bs->md_len, 8),
    5596             :                                     SPDK_BS_PAGE_SIZE);
    5597         476 :         max_used_cluster_mask_len = spdk_max(max_used_cluster_mask_len,
    5598             :                                              ctx->super->used_cluster_mask_len);
    5599         476 :         num_md_pages += max_used_cluster_mask_len;
    5600             : 
    5601             :         /* The used_blobids mask requires 1 bit per metadata page, rounded
    5602             :          * up to the nearest page, plus a header.
    5603             :          */
    5604         476 :         ctx->super->used_blobid_mask_start = num_md_pages;
    5605         476 :         ctx->super->used_blobid_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5606         476 :                                            spdk_divide_round_up(bs->md_len, 8),
    5607             :                                            SPDK_BS_PAGE_SIZE);
    5608         476 :         num_md_pages += ctx->super->used_blobid_mask_len;
    5609             : 
    5610             :         /* The metadata region size was chosen above */
    5611         476 :         ctx->super->md_start = bs->md_start = num_md_pages;
    5612         476 :         ctx->super->md_len = bs->md_len;
    5613         476 :         num_md_pages += bs->md_len;
    5614             : 
    5615         476 :         num_md_lba = bs_page_to_lba(bs, num_md_pages);
    5616             : 
    5617         476 :         ctx->super->size = dev->blockcnt * dev->blocklen;
    5618             : 
    5619         476 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    5620             : 
    5621         476 :         num_md_clusters = spdk_divide_round_up(num_md_pages, bs->pages_per_cluster);
    5622         476 :         if (num_md_clusters > bs->total_clusters) {
    5623           4 :                 SPDK_ERRLOG("Blobstore metadata cannot use more clusters than is available, "
    5624             :                             "please decrease number of pages reserved for metadata "
    5625             :                             "or increase cluster size.\n");
    5626           4 :                 spdk_free(ctx->super);
    5627           4 :                 spdk_bit_array_free(&ctx->used_clusters);
    5628           4 :                 free(ctx);
    5629           4 :                 bs_free(bs);
    5630           4 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5631           4 :                 return;
    5632             :         }
    5633             :         /* Claim all of the clusters used by the metadata */
    5634       75700 :         for (i = 0; i < num_md_clusters; i++) {
    5635       75228 :                 spdk_bit_array_set(ctx->used_clusters, i);
    5636             :         }
    5637             : 
    5638         472 :         bs->num_free_clusters -= num_md_clusters;
    5639         472 :         bs->total_data_clusters = bs->num_free_clusters;
    5640             : 
    5641         472 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5642         472 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5643         472 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5644         472 :         cpl.u.bs_handle.bs = bs;
    5645             : 
    5646         472 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5647         472 :         if (!seq) {
    5648           0 :                 spdk_free(ctx->super);
    5649           0 :                 free(ctx);
    5650           0 :                 bs_free(bs);
    5651           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5652           0 :                 return;
    5653             :         }
    5654             : 
    5655         472 :         batch = bs_sequence_to_batch(seq, bs_init_trim_cpl, ctx);
    5656             : 
    5657             :         /* Clear metadata space */
    5658         472 :         bs_batch_write_zeroes_dev(batch, 0, num_md_lba);
    5659             : 
    5660         472 :         lba = num_md_lba;
    5661         472 :         lba_count = ctx->bs->dev->blockcnt - lba;
    5662         472 :         switch (opts.clear_method) {
    5663         456 :         case BS_CLEAR_WITH_UNMAP:
    5664             :                 /* Trim data clusters */
    5665         456 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    5666         456 :                 break;
    5667           0 :         case BS_CLEAR_WITH_WRITE_ZEROES:
    5668             :                 /* Write_zeroes to data clusters */
    5669           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    5670           0 :                 break;
    5671          16 :         case BS_CLEAR_WITH_NONE:
    5672             :         default:
    5673          16 :                 break;
    5674             :         }
    5675             : 
    5676         472 :         bs_batch_close(batch);
    5677             : }
    5678             : 
    5679             : /* END spdk_bs_init */
    5680             : 
    5681             : /* START spdk_bs_destroy */
    5682             : 
    5683             : static void
    5684           4 : bs_destroy_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5685             : {
    5686           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5687           4 :         struct spdk_blob_store *bs = ctx->bs;
    5688             : 
    5689             :         /*
    5690             :          * We need to defer calling bs_call_cpl() until after
    5691             :          * dev destruction, so tuck these away for later use.
    5692             :          */
    5693           4 :         bs->unload_err = bserrno;
    5694           4 :         memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5695           4 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5696             : 
    5697           4 :         bs_sequence_finish(seq, bserrno);
    5698             : 
    5699           4 :         bs_free(bs);
    5700           4 :         free(ctx);
    5701           4 : }
    5702             : 
    5703             : void
    5704           4 : spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
    5705             :                 void *cb_arg)
    5706             : {
    5707           4 :         struct spdk_bs_cpl      cpl;
    5708             :         spdk_bs_sequence_t      *seq;
    5709             :         struct spdk_bs_load_ctx *ctx;
    5710             : 
    5711           4 :         SPDK_DEBUGLOG(blob, "Destroying blobstore\n");
    5712             : 
    5713           4 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5714           0 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5715           0 :                 cb_fn(cb_arg, -EBUSY);
    5716           0 :                 return;
    5717             :         }
    5718             : 
    5719           4 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5720           4 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5721           4 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5722             : 
    5723           4 :         ctx = calloc(1, sizeof(*ctx));
    5724           4 :         if (!ctx) {
    5725           0 :                 cb_fn(cb_arg, -ENOMEM);
    5726           0 :                 return;
    5727             :         }
    5728             : 
    5729           4 :         ctx->bs = bs;
    5730             : 
    5731           4 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5732           4 :         if (!seq) {
    5733           0 :                 free(ctx);
    5734           0 :                 cb_fn(cb_arg, -ENOMEM);
    5735           0 :                 return;
    5736             :         }
    5737             : 
    5738             :         /* Write zeroes to the super block */
    5739           4 :         bs_sequence_write_zeroes_dev(seq,
    5740             :                                      bs_page_to_lba(bs, 0),
    5741             :                                      bs_byte_to_lba(bs, sizeof(struct spdk_bs_super_block)),
    5742             :                                      bs_destroy_trim_cpl, ctx);
    5743             : }
    5744             : 
    5745             : /* END spdk_bs_destroy */
    5746             : 
    5747             : /* START spdk_bs_unload */
    5748             : 
    5749             : static void
    5750         654 : bs_unload_finish(struct spdk_bs_load_ctx *ctx, int bserrno)
    5751             : {
    5752         654 :         spdk_bs_sequence_t *seq = ctx->seq;
    5753             : 
    5754         654 :         spdk_free(ctx->super);
    5755             : 
    5756             :         /*
    5757             :          * We need to defer calling bs_call_cpl() until after
    5758             :          * dev destruction, so tuck these away for later use.
    5759             :          */
    5760         654 :         ctx->bs->unload_err = bserrno;
    5761         654 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5762         654 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5763             : 
    5764         654 :         bs_sequence_finish(seq, bserrno);
    5765             : 
    5766         654 :         bs_free(ctx->bs);
    5767         654 :         free(ctx);
    5768         654 : }
    5769             : 
    5770             : static void
    5771         654 : bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5772             : {
    5773         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5774             : 
    5775         654 :         bs_unload_finish(ctx, bserrno);
    5776         654 : }
    5777             : 
    5778             : static void
    5779         654 : bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5780             : {
    5781         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5782             : 
    5783         654 :         spdk_free(ctx->mask);
    5784             : 
    5785         654 :         if (bserrno != 0) {
    5786           0 :                 bs_unload_finish(ctx, bserrno);
    5787           0 :                 return;
    5788             :         }
    5789             : 
    5790         654 :         ctx->super->clean = 1;
    5791             : 
    5792         654 :         bs_write_super(seq, ctx->bs, ctx->super, bs_unload_write_super_cpl, ctx);
    5793             : }
    5794             : 
    5795             : static void
    5796         654 : bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5797             : {
    5798         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5799             : 
    5800         654 :         spdk_free(ctx->mask);
    5801         654 :         ctx->mask = NULL;
    5802             : 
    5803         654 :         if (bserrno != 0) {
    5804           0 :                 bs_unload_finish(ctx, bserrno);
    5805           0 :                 return;
    5806             :         }
    5807             : 
    5808         654 :         bs_write_used_clusters(seq, ctx, bs_unload_write_used_clusters_cpl);
    5809             : }
    5810             : 
    5811             : static void
    5812         654 : bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5813             : {
    5814         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5815             : 
    5816         654 :         spdk_free(ctx->mask);
    5817         654 :         ctx->mask = NULL;
    5818             : 
    5819         654 :         if (bserrno != 0) {
    5820           0 :                 bs_unload_finish(ctx, bserrno);
    5821           0 :                 return;
    5822             :         }
    5823             : 
    5824         654 :         bs_write_used_blobids(seq, ctx, bs_unload_write_used_blobids_cpl);
    5825             : }
    5826             : 
    5827             : static void
    5828         654 : bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5829             : {
    5830         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5831             :         int rc;
    5832             : 
    5833         654 :         if (bserrno != 0) {
    5834           0 :                 bs_unload_finish(ctx, bserrno);
    5835           0 :                 return;
    5836             :         }
    5837             : 
    5838         654 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5839         654 :         if (rc != 0) {
    5840           0 :                 bs_unload_finish(ctx, rc);
    5841           0 :                 return;
    5842             :         }
    5843             : 
    5844         654 :         bs_write_used_md(seq, cb_arg, bs_unload_write_used_pages_cpl);
    5845             : }
    5846             : 
    5847             : void
    5848         662 : spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg)
    5849             : {
    5850         662 :         struct spdk_bs_cpl      cpl;
    5851             :         struct spdk_bs_load_ctx *ctx;
    5852             : 
    5853         662 :         SPDK_DEBUGLOG(blob, "Syncing blobstore\n");
    5854             : 
    5855             :         /*
    5856             :          * If external snapshot channels are being destroyed while the blobstore is unloaded, the
    5857             :          * unload is deferred until after the channel destruction completes.
    5858             :          */
    5859         662 :         if (bs->esnap_channels_unloading != 0) {
    5860           4 :                 if (bs->esnap_unload_cb_fn != NULL) {
    5861           0 :                         SPDK_ERRLOG("Blobstore unload in progress\n");
    5862           0 :                         cb_fn(cb_arg, -EBUSY);
    5863           0 :                         return;
    5864             :                 }
    5865           4 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32
    5866             :                               " esnap clones are unloading\n", bs->esnap_channels_unloading);
    5867           4 :                 bs->esnap_unload_cb_fn = cb_fn;
    5868           4 :                 bs->esnap_unload_cb_arg = cb_arg;
    5869           4 :                 return;
    5870             :         }
    5871         658 :         if (bs->esnap_unload_cb_fn != NULL) {
    5872           4 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n");
    5873           4 :                 assert(bs->esnap_unload_cb_fn == cb_fn);
    5874           4 :                 assert(bs->esnap_unload_cb_arg == cb_arg);
    5875           4 :                 bs->esnap_unload_cb_fn = NULL;
    5876           4 :                 bs->esnap_unload_cb_arg = NULL;
    5877             :         }
    5878             : 
    5879         658 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5880           4 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5881           4 :                 cb_fn(cb_arg, -EBUSY);
    5882           4 :                 return;
    5883             :         }
    5884             : 
    5885         654 :         ctx = calloc(1, sizeof(*ctx));
    5886         654 :         if (!ctx) {
    5887           0 :                 cb_fn(cb_arg, -ENOMEM);
    5888           0 :                 return;
    5889             :         }
    5890             : 
    5891         654 :         ctx->bs = bs;
    5892             : 
    5893         654 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    5894             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5895         654 :         if (!ctx->super) {
    5896           0 :                 free(ctx);
    5897           0 :                 cb_fn(cb_arg, -ENOMEM);
    5898           0 :                 return;
    5899             :         }
    5900             : 
    5901         654 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5902         654 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5903         654 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5904             : 
    5905         654 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5906         654 :         if (!ctx->seq) {
    5907           0 :                 spdk_free(ctx->super);
    5908           0 :                 free(ctx);
    5909           0 :                 cb_fn(cb_arg, -ENOMEM);
    5910           0 :                 return;
    5911             :         }
    5912             : 
    5913             :         /* Read super block */
    5914         654 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5915         654 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5916             :                              bs_unload_read_super_cpl, ctx);
    5917             : }
    5918             : 
    5919             : /* END spdk_bs_unload */
    5920             : 
    5921             : /* START spdk_bs_set_super */
    5922             : 
    5923             : struct spdk_bs_set_super_ctx {
    5924             :         struct spdk_blob_store          *bs;
    5925             :         struct spdk_bs_super_block      *super;
    5926             : };
    5927             : 
    5928             : static void
    5929           8 : bs_set_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5930             : {
    5931           8 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5932             : 
    5933           8 :         if (bserrno != 0) {
    5934           0 :                 SPDK_ERRLOG("Unable to write to super block of blobstore\n");
    5935             :         }
    5936             : 
    5937           8 :         spdk_free(ctx->super);
    5938             : 
    5939           8 :         bs_sequence_finish(seq, bserrno);
    5940             : 
    5941           8 :         free(ctx);
    5942           8 : }
    5943             : 
    5944             : static void
    5945           8 : bs_set_super_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5946             : {
    5947           8 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5948             :         int rc;
    5949             : 
    5950           8 :         if (bserrno != 0) {
    5951           0 :                 SPDK_ERRLOG("Unable to read super block of blobstore\n");
    5952           0 :                 spdk_free(ctx->super);
    5953           0 :                 bs_sequence_finish(seq, bserrno);
    5954           0 :                 free(ctx);
    5955           0 :                 return;
    5956             :         }
    5957             : 
    5958           8 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5959           8 :         if (rc != 0) {
    5960           0 :                 SPDK_ERRLOG("Not a valid super block\n");
    5961           0 :                 spdk_free(ctx->super);
    5962           0 :                 bs_sequence_finish(seq, rc);
    5963           0 :                 free(ctx);
    5964           0 :                 return;
    5965             :         }
    5966             : 
    5967           8 :         bs_write_super(seq, ctx->bs, ctx->super, bs_set_super_write_cpl, ctx);
    5968             : }
    5969             : 
    5970             : void
    5971           8 : spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
    5972             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    5973             : {
    5974           8 :         struct spdk_bs_cpl              cpl;
    5975             :         spdk_bs_sequence_t              *seq;
    5976             :         struct spdk_bs_set_super_ctx    *ctx;
    5977             : 
    5978           8 :         SPDK_DEBUGLOG(blob, "Setting super blob id on blobstore\n");
    5979             : 
    5980           8 :         ctx = calloc(1, sizeof(*ctx));
    5981           8 :         if (!ctx) {
    5982           0 :                 cb_fn(cb_arg, -ENOMEM);
    5983           0 :                 return;
    5984             :         }
    5985             : 
    5986           8 :         ctx->bs = bs;
    5987             : 
    5988           8 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    5989             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5990           8 :         if (!ctx->super) {
    5991           0 :                 free(ctx);
    5992           0 :                 cb_fn(cb_arg, -ENOMEM);
    5993           0 :                 return;
    5994             :         }
    5995             : 
    5996           8 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5997           8 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5998           8 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5999             : 
    6000           8 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6001           8 :         if (!seq) {
    6002           0 :                 spdk_free(ctx->super);
    6003           0 :                 free(ctx);
    6004           0 :                 cb_fn(cb_arg, -ENOMEM);
    6005           0 :                 return;
    6006             :         }
    6007             : 
    6008           8 :         bs->super_blob = blobid;
    6009             : 
    6010             :         /* Read super block */
    6011           8 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    6012           8 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    6013             :                              bs_set_super_read_cpl, ctx);
    6014             : }
    6015             : 
    6016             : /* END spdk_bs_set_super */
    6017             : 
    6018             : void
    6019          12 : spdk_bs_get_super(struct spdk_blob_store *bs,
    6020             :                   spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6021             : {
    6022          12 :         if (bs->super_blob == SPDK_BLOBID_INVALID) {
    6023           4 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT);
    6024             :         } else {
    6025           8 :                 cb_fn(cb_arg, bs->super_blob, 0);
    6026             :         }
    6027          12 : }
    6028             : 
    6029             : uint64_t
    6030         132 : spdk_bs_get_cluster_size(struct spdk_blob_store *bs)
    6031             : {
    6032         132 :         return bs->cluster_sz;
    6033             : }
    6034             : 
    6035             : uint64_t
    6036          68 : spdk_bs_get_page_size(struct spdk_blob_store *bs)
    6037             : {
    6038          68 :         return SPDK_BS_PAGE_SIZE;
    6039             : }
    6040             : 
    6041             : uint64_t
    6042         734 : spdk_bs_get_io_unit_size(struct spdk_blob_store *bs)
    6043             : {
    6044         734 :         return bs->io_unit_size;
    6045             : }
    6046             : 
    6047             : uint64_t
    6048         540 : spdk_bs_free_cluster_count(struct spdk_blob_store *bs)
    6049             : {
    6050         540 :         return bs->num_free_clusters;
    6051             : }
    6052             : 
    6053             : uint64_t
    6054          92 : spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs)
    6055             : {
    6056          92 :         return bs->total_data_clusters;
    6057             : }
    6058             : 
    6059             : static int
    6060         780 : bs_register_md_thread(struct spdk_blob_store *bs)
    6061             : {
    6062         780 :         bs->md_channel = spdk_get_io_channel(bs);
    6063         780 :         if (!bs->md_channel) {
    6064           0 :                 SPDK_ERRLOG("Failed to get IO channel.\n");
    6065           0 :                 return -1;
    6066             :         }
    6067             : 
    6068         780 :         return 0;
    6069             : }
    6070             : 
    6071             : static int
    6072         780 : bs_unregister_md_thread(struct spdk_blob_store *bs)
    6073             : {
    6074         780 :         spdk_put_io_channel(bs->md_channel);
    6075             : 
    6076         780 :         return 0;
    6077             : }
    6078             : 
    6079             : spdk_blob_id
    6080         562 : spdk_blob_get_id(struct spdk_blob *blob)
    6081             : {
    6082         562 :         assert(blob != NULL);
    6083             : 
    6084         562 :         return blob->id;
    6085             : }
    6086             : 
    6087             : uint64_t
    6088          24 : spdk_blob_get_num_pages(struct spdk_blob *blob)
    6089             : {
    6090          24 :         assert(blob != NULL);
    6091             : 
    6092          24 :         return bs_cluster_to_page(blob->bs, blob->active.num_clusters);
    6093             : }
    6094             : 
    6095             : uint64_t
    6096          24 : spdk_blob_get_num_io_units(struct spdk_blob *blob)
    6097             : {
    6098          24 :         assert(blob != NULL);
    6099             : 
    6100          24 :         return spdk_blob_get_num_pages(blob) * bs_io_unit_per_page(blob->bs);
    6101             : }
    6102             : 
    6103             : uint64_t
    6104         565 : spdk_blob_get_num_clusters(struct spdk_blob *blob)
    6105             : {
    6106         565 :         assert(blob != NULL);
    6107             : 
    6108         565 :         return blob->active.num_clusters;
    6109             : }
    6110             : 
    6111             : uint64_t
    6112         330 : spdk_blob_get_num_allocated_clusters(struct spdk_blob *blob)
    6113             : {
    6114         330 :         assert(blob != NULL);
    6115             : 
    6116         330 :         return blob->active.num_allocated_clusters;
    6117             : }
    6118             : 
    6119             : static uint64_t
    6120          24 : blob_find_io_unit(struct spdk_blob *blob, uint64_t offset, bool is_allocated)
    6121             : {
    6122          24 :         uint64_t blob_io_unit_num = spdk_blob_get_num_io_units(blob);
    6123             : 
    6124          44 :         while (offset < blob_io_unit_num) {
    6125          40 :                 if (bs_io_unit_is_allocated(blob, offset) == is_allocated) {
    6126          20 :                         return offset;
    6127             :                 }
    6128             : 
    6129          20 :                 offset += bs_num_io_units_to_cluster_boundary(blob, offset);
    6130             :         }
    6131             : 
    6132           4 :         return UINT64_MAX;
    6133             : }
    6134             : 
    6135             : uint64_t
    6136          12 : spdk_blob_get_next_allocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6137             : {
    6138          12 :         return blob_find_io_unit(blob, offset, true);
    6139             : }
    6140             : 
    6141             : uint64_t
    6142          12 : spdk_blob_get_next_unallocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6143             : {
    6144          12 :         return blob_find_io_unit(blob, offset, false);
    6145             : }
    6146             : 
    6147             : /* START spdk_bs_create_blob */
    6148             : 
    6149             : static void
    6150        1874 : bs_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    6151             : {
    6152        1874 :         struct spdk_blob *blob = cb_arg;
    6153        1874 :         uint32_t page_idx = bs_blobid_to_page(blob->id);
    6154             : 
    6155        1874 :         if (bserrno != 0) {
    6156           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    6157           0 :                 spdk_bit_array_clear(blob->bs->used_blobids, page_idx);
    6158           0 :                 bs_release_md_page(blob->bs, page_idx);
    6159           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    6160             :         }
    6161             : 
    6162        1874 :         blob_free(blob);
    6163             : 
    6164        1874 :         bs_sequence_finish(seq, bserrno);
    6165        1874 : }
    6166             : 
    6167             : static int
    6168        3768 : blob_set_xattrs(struct spdk_blob *blob, const struct spdk_blob_xattr_opts *xattrs,
    6169             :                 bool internal)
    6170             : {
    6171             :         uint64_t i;
    6172        3768 :         size_t value_len = 0;
    6173             :         int rc;
    6174        3768 :         const void *value = NULL;
    6175        3768 :         if (xattrs->count > 0 && xattrs->get_value == NULL) {
    6176           8 :                 return -EINVAL;
    6177             :         }
    6178        4072 :         for (i = 0; i < xattrs->count; i++) {
    6179         316 :                 xattrs->get_value(xattrs->ctx, xattrs->names[i], &value, &value_len);
    6180         316 :                 if (value == NULL || value_len == 0) {
    6181           4 :                         return -EINVAL;
    6182             :                 }
    6183         312 :                 rc = blob_set_xattr(blob, xattrs->names[i], value, value_len, internal);
    6184         312 :                 if (rc < 0) {
    6185           0 :                         return rc;
    6186             :                 }
    6187             :         }
    6188        3756 :         return 0;
    6189             : }
    6190             : 
    6191             : static void
    6192        1858 : blob_opts_copy(const struct spdk_blob_opts *src, struct spdk_blob_opts *dst)
    6193             : {
    6194             : #define FIELD_OK(field) \
    6195             :         offsetof(struct spdk_blob_opts, field) + sizeof(src->field) <= src->opts_size
    6196             : 
    6197             : #define SET_FIELD(field) \
    6198             :         if (FIELD_OK(field)) { \
    6199             :                 dst->field = src->field; \
    6200             :         } \
    6201             : 
    6202        1858 :         SET_FIELD(num_clusters);
    6203        1858 :         SET_FIELD(thin_provision);
    6204        1858 :         SET_FIELD(clear_method);
    6205             : 
    6206        1858 :         if (FIELD_OK(xattrs)) {
    6207        1858 :                 memcpy(&dst->xattrs, &src->xattrs, sizeof(src->xattrs));
    6208             :         }
    6209             : 
    6210        1858 :         SET_FIELD(use_extent_table);
    6211        1858 :         SET_FIELD(esnap_id);
    6212        1858 :         SET_FIELD(esnap_id_len);
    6213             : 
    6214        1858 :         dst->opts_size = src->opts_size;
    6215             : 
    6216             :         /* You should not remove this statement, but need to update the assert statement
    6217             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    6218             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_opts) == 80, "Incorrect size");
    6219             : 
    6220             : #undef FIELD_OK
    6221             : #undef SET_FIELD
    6222        1858 : }
    6223             : 
    6224             : static void
    6225        1890 : bs_create_blob(struct spdk_blob_store *bs,
    6226             :                const struct spdk_blob_opts *opts,
    6227             :                const struct spdk_blob_xattr_opts *internal_xattrs,
    6228             :                spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6229             : {
    6230             :         struct spdk_blob        *blob;
    6231             :         uint32_t                page_idx;
    6232        1890 :         struct spdk_bs_cpl      cpl;
    6233        1890 :         struct spdk_blob_opts   opts_local;
    6234        1890 :         struct spdk_blob_xattr_opts internal_xattrs_default;
    6235             :         spdk_bs_sequence_t      *seq;
    6236             :         spdk_blob_id            id;
    6237             :         int rc;
    6238             : 
    6239        1890 :         assert(spdk_get_thread() == bs->md_thread);
    6240             : 
    6241        1890 :         spdk_spin_lock(&bs->used_lock);
    6242        1890 :         page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0);
    6243        1890 :         if (page_idx == UINT32_MAX) {
    6244           0 :                 spdk_spin_unlock(&bs->used_lock);
    6245           0 :                 cb_fn(cb_arg, 0, -ENOMEM);
    6246           0 :                 return;
    6247             :         }
    6248        1890 :         spdk_bit_array_set(bs->used_blobids, page_idx);
    6249        1890 :         bs_claim_md_page(bs, page_idx);
    6250        1890 :         spdk_spin_unlock(&bs->used_lock);
    6251             : 
    6252        1890 :         id = bs_page_to_blobid(page_idx);
    6253             : 
    6254        1890 :         SPDK_DEBUGLOG(blob, "Creating blob with id 0x%" PRIx64 " at page %u\n", id, page_idx);
    6255             : 
    6256        1890 :         spdk_blob_opts_init(&opts_local, sizeof(opts_local));
    6257        1890 :         if (opts) {
    6258        1858 :                 blob_opts_copy(opts, &opts_local);
    6259             :         }
    6260             : 
    6261        1890 :         blob = blob_alloc(bs, id);
    6262        1890 :         if (!blob) {
    6263           0 :                 rc = -ENOMEM;
    6264           0 :                 goto error;
    6265             :         }
    6266             : 
    6267        1890 :         blob->use_extent_table = opts_local.use_extent_table;
    6268        1890 :         if (blob->use_extent_table) {
    6269         966 :                 blob->invalid_flags |= SPDK_BLOB_EXTENT_TABLE;
    6270             :         }
    6271             : 
    6272        1890 :         if (!internal_xattrs) {
    6273        1622 :                 blob_xattrs_init(&internal_xattrs_default);
    6274        1622 :                 internal_xattrs = &internal_xattrs_default;
    6275             :         }
    6276             : 
    6277        1890 :         rc = blob_set_xattrs(blob, &opts_local.xattrs, false);
    6278        1890 :         if (rc < 0) {
    6279          12 :                 goto error;
    6280             :         }
    6281             : 
    6282        1878 :         rc = blob_set_xattrs(blob, internal_xattrs, true);
    6283        1878 :         if (rc < 0) {
    6284           0 :                 goto error;
    6285             :         }
    6286             : 
    6287        1878 :         if (opts_local.thin_provision) {
    6288         352 :                 blob_set_thin_provision(blob);
    6289             :         }
    6290             : 
    6291        1878 :         blob_set_clear_method(blob, opts_local.clear_method);
    6292             : 
    6293        1878 :         if (opts_local.esnap_id != NULL) {
    6294          60 :                 if (opts_local.esnap_id_len > UINT16_MAX) {
    6295           0 :                         SPDK_ERRLOG("esnap id length %" PRIu64 "is too long\n",
    6296             :                                     opts_local.esnap_id_len);
    6297           0 :                         rc = -EINVAL;
    6298           0 :                         goto error;
    6299             : 
    6300             :                 }
    6301          60 :                 blob_set_thin_provision(blob);
    6302          60 :                 blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6303          60 :                 rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID,
    6304          60 :                                     opts_local.esnap_id, opts_local.esnap_id_len, true);
    6305          60 :                 if (rc != 0) {
    6306           0 :                         goto error;
    6307             :                 }
    6308             :         }
    6309             : 
    6310        1878 :         rc = blob_resize(blob, opts_local.num_clusters);
    6311        1878 :         if (rc < 0) {
    6312           4 :                 goto error;
    6313             :         }
    6314        1874 :         cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6315        1874 :         cpl.u.blobid.cb_fn = cb_fn;
    6316        1874 :         cpl.u.blobid.cb_arg = cb_arg;
    6317        1874 :         cpl.u.blobid.blobid = blob->id;
    6318             : 
    6319        1874 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6320        1874 :         if (!seq) {
    6321           0 :                 rc = -ENOMEM;
    6322           0 :                 goto error;
    6323             :         }
    6324             : 
    6325        1874 :         blob_persist(seq, blob, bs_create_blob_cpl, blob);
    6326        1874 :         return;
    6327             : 
    6328          16 : error:
    6329          16 :         SPDK_ERRLOG("Failed to create blob: %s, size in clusters/size: %lu (clusters)\n",
    6330             :                     spdk_strerror(rc), opts_local.num_clusters);
    6331          16 :         if (blob != NULL) {
    6332          16 :                 blob_free(blob);
    6333             :         }
    6334          16 :         spdk_spin_lock(&bs->used_lock);
    6335          16 :         spdk_bit_array_clear(bs->used_blobids, page_idx);
    6336          16 :         bs_release_md_page(bs, page_idx);
    6337          16 :         spdk_spin_unlock(&bs->used_lock);
    6338          16 :         cb_fn(cb_arg, 0, rc);
    6339             : }
    6340             : 
    6341             : void
    6342          16 : spdk_bs_create_blob(struct spdk_blob_store *bs,
    6343             :                     spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6344             : {
    6345          16 :         bs_create_blob(bs, NULL, NULL, cb_fn, cb_arg);
    6346          16 : }
    6347             : 
    6348             : void
    6349        1598 : spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
    6350             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6351             : {
    6352        1598 :         bs_create_blob(bs, opts, NULL, cb_fn, cb_arg);
    6353        1598 : }
    6354             : 
    6355             : /* END spdk_bs_create_blob */
    6356             : 
    6357             : /* START blob_cleanup */
    6358             : 
    6359             : struct spdk_clone_snapshot_ctx {
    6360             :         struct spdk_bs_cpl      cpl;
    6361             :         int bserrno;
    6362             :         bool frozen;
    6363             : 
    6364             :         struct spdk_io_channel *channel;
    6365             : 
    6366             :         /* Current cluster for inflate operation */
    6367             :         uint64_t cluster;
    6368             : 
    6369             :         /* For inflation force allocation of all unallocated clusters and remove
    6370             :          * thin-provisioning. Otherwise only decouple parent and keep clone thin. */
    6371             :         bool allocate_all;
    6372             : 
    6373             :         struct {
    6374             :                 spdk_blob_id id;
    6375             :                 struct spdk_blob *blob;
    6376             :                 bool md_ro;
    6377             :         } original;
    6378             :         struct {
    6379             :                 spdk_blob_id id;
    6380             :                 struct spdk_blob *blob;
    6381             :         } new;
    6382             : 
    6383             :         /* xattrs specified for snapshot/clones only. They have no impact on
    6384             :          * the original blobs xattrs. */
    6385             :         const struct spdk_blob_xattr_opts *xattrs;
    6386             : };
    6387             : 
    6388             : static void
    6389         338 : bs_clone_snapshot_cleanup_finish(void *cb_arg, int bserrno)
    6390             : {
    6391         338 :         struct spdk_clone_snapshot_ctx *ctx = cb_arg;
    6392         338 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    6393             : 
    6394         338 :         if (bserrno != 0) {
    6395           6 :                 if (ctx->bserrno != 0) {
    6396           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6397             :                 } else {
    6398           6 :                         ctx->bserrno = bserrno;
    6399             :                 }
    6400             :         }
    6401             : 
    6402         338 :         switch (cpl->type) {
    6403         278 :         case SPDK_BS_CPL_TYPE_BLOBID:
    6404         278 :                 cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, cpl->u.blobid.blobid, ctx->bserrno);
    6405         278 :                 break;
    6406          60 :         case SPDK_BS_CPL_TYPE_BLOB_BASIC:
    6407          60 :                 cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    6408          60 :                 break;
    6409           0 :         default:
    6410           0 :                 SPDK_UNREACHABLE();
    6411             :                 break;
    6412             :         }
    6413             : 
    6414         338 :         free(ctx);
    6415         338 : }
    6416             : 
    6417             : static void
    6418         324 : bs_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    6419             : {
    6420         324 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6421         324 :         struct spdk_blob *origblob = ctx->original.blob;
    6422             : 
    6423         324 :         if (bserrno != 0) {
    6424           0 :                 if (ctx->bserrno != 0) {
    6425           0 :                         SPDK_ERRLOG("Unfreeze error %d\n", bserrno);
    6426             :                 } else {
    6427           0 :                         ctx->bserrno = bserrno;
    6428             :                 }
    6429             :         }
    6430             : 
    6431         324 :         ctx->original.id = origblob->id;
    6432         324 :         origblob->locked_operation_in_progress = false;
    6433             : 
    6434             :         /* Revert md_ro to original state */
    6435         324 :         origblob->md_ro = ctx->original.md_ro;
    6436             : 
    6437         324 :         spdk_blob_close(origblob, bs_clone_snapshot_cleanup_finish, ctx);
    6438         324 : }
    6439             : 
    6440             : static void
    6441         324 : bs_clone_snapshot_origblob_cleanup(void *cb_arg, int bserrno)
    6442             : {
    6443         324 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6444         324 :         struct spdk_blob *origblob = ctx->original.blob;
    6445             : 
    6446         324 :         if (bserrno != 0) {
    6447          24 :                 if (ctx->bserrno != 0) {
    6448           4 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6449             :                 } else {
    6450          20 :                         ctx->bserrno = bserrno;
    6451             :                 }
    6452             :         }
    6453             : 
    6454         324 :         if (ctx->frozen) {
    6455             :                 /* Unfreeze any outstanding I/O */
    6456         208 :                 blob_unfreeze_io(origblob, bs_snapshot_unfreeze_cpl, ctx);
    6457             :         } else {
    6458         116 :                 bs_snapshot_unfreeze_cpl(ctx, 0);
    6459             :         }
    6460             : 
    6461         324 : }
    6462             : 
    6463             : static void
    6464           4 : bs_clone_snapshot_newblob_cleanup(struct spdk_clone_snapshot_ctx *ctx, int bserrno)
    6465             : {
    6466           4 :         struct spdk_blob *newblob = ctx->new.blob;
    6467             : 
    6468           4 :         if (bserrno != 0) {
    6469           4 :                 if (ctx->bserrno != 0) {
    6470           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6471             :                 } else {
    6472           4 :                         ctx->bserrno = bserrno;
    6473             :                 }
    6474             :         }
    6475             : 
    6476           4 :         ctx->new.id = newblob->id;
    6477           4 :         spdk_blob_close(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6478           4 : }
    6479             : 
    6480             : /* END blob_cleanup */
    6481             : 
    6482             : /* START spdk_bs_create_snapshot */
    6483             : 
    6484             : static void
    6485         216 : bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
    6486             : {
    6487             :         uint64_t *cluster_temp;
    6488             :         uint64_t num_allocated_clusters_temp;
    6489             :         uint32_t *extent_page_temp;
    6490             : 
    6491         216 :         cluster_temp = blob1->active.clusters;
    6492         216 :         blob1->active.clusters = blob2->active.clusters;
    6493         216 :         blob2->active.clusters = cluster_temp;
    6494             : 
    6495         216 :         num_allocated_clusters_temp = blob1->active.num_allocated_clusters;
    6496         216 :         blob1->active.num_allocated_clusters = blob2->active.num_allocated_clusters;
    6497         216 :         blob2->active.num_allocated_clusters = num_allocated_clusters_temp;
    6498             : 
    6499         216 :         extent_page_temp = blob1->active.extent_pages;
    6500         216 :         blob1->active.extent_pages = blob2->active.extent_pages;
    6501         216 :         blob2->active.extent_pages = extent_page_temp;
    6502         216 : }
    6503             : 
    6504             : /* Copies an internal xattr */
    6505             : static int
    6506          20 : bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
    6507             : {
    6508          20 :         const void      *val = NULL;
    6509          20 :         size_t          len;
    6510             :         int             bserrno;
    6511             : 
    6512          20 :         bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
    6513          20 :         if (bserrno != 0) {
    6514           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s XATTR\n", fromblob->id, name);
    6515           0 :                 return bserrno;
    6516             :         }
    6517             : 
    6518          20 :         bserrno = blob_set_xattr(toblob, name, val, len, true);
    6519          20 :         if (bserrno != 0) {
    6520           0 :                 SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
    6521             :                             name, toblob->id);
    6522           0 :                 return bserrno;
    6523             :         }
    6524          20 :         return 0;
    6525             : }
    6526             : 
    6527             : static void
    6528         204 : bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
    6529             : {
    6530         204 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6531         204 :         struct spdk_blob *origblob = ctx->original.blob;
    6532         204 :         struct spdk_blob *newblob = ctx->new.blob;
    6533             : 
    6534         204 :         if (bserrno != 0) {
    6535           4 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6536           4 :                 if (blob_is_esnap_clone(newblob)) {
    6537           0 :                         bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6538           0 :                         origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6539             :                 }
    6540           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6541           4 :                 return;
    6542             :         }
    6543             : 
    6544             :         /* Remove metadata descriptor SNAPSHOT_IN_PROGRESS */
    6545         200 :         bserrno = blob_remove_xattr(newblob, SNAPSHOT_IN_PROGRESS, true);
    6546         200 :         if (bserrno != 0) {
    6547           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6548           0 :                 return;
    6549             :         }
    6550             : 
    6551         200 :         bs_blob_list_add(ctx->original.blob);
    6552             : 
    6553         200 :         spdk_blob_set_read_only(newblob);
    6554             : 
    6555             :         /* sync snapshot metadata */
    6556         200 :         spdk_blob_sync_md(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6557             : }
    6558             : 
    6559             : static void
    6560         208 : bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
    6561             : {
    6562         208 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6563         208 :         struct spdk_blob *origblob = ctx->original.blob;
    6564         208 :         struct spdk_blob *newblob = ctx->new.blob;
    6565             : 
    6566         208 :         if (bserrno != 0) {
    6567             :                 /* return cluster map back to original */
    6568           4 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6569             : 
    6570             :                 /* Newblob md sync failed. Valid clusters are only present in origblob.
    6571             :                  * Since I/O is frozen on origblob, not changes to zeroed out cluster map should have occurred.
    6572             :                  * Newblob needs to be reverted to thin_provisioned state at creation to properly close. */
    6573           4 :                 blob_set_thin_provision(newblob);
    6574           4 :                 assert(spdk_mem_all_zero(newblob->active.clusters,
    6575             :                                          newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6576           4 :                 assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6577             :                                          newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6578             : 
    6579           4 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6580           4 :                 return;
    6581             :         }
    6582             : 
    6583             :         /* Set internal xattr for snapshot id */
    6584         204 :         bserrno = blob_set_xattr(origblob, BLOB_SNAPSHOT, &newblob->id, sizeof(spdk_blob_id), true);
    6585         204 :         if (bserrno != 0) {
    6586             :                 /* return cluster map back to original */
    6587           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6588           0 :                 blob_set_thin_provision(newblob);
    6589           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6590           0 :                 return;
    6591             :         }
    6592             : 
    6593             :         /* Create new back_bs_dev for snapshot */
    6594         204 :         origblob->back_bs_dev = bs_create_blob_bs_dev(newblob);
    6595         204 :         if (origblob->back_bs_dev == NULL) {
    6596             :                 /* return cluster map back to original */
    6597           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6598           0 :                 blob_set_thin_provision(newblob);
    6599           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, -EINVAL);
    6600           0 :                 return;
    6601             :         }
    6602             : 
    6603             :         /* Remove the xattr that references an external snapshot */
    6604         204 :         if (blob_is_esnap_clone(origblob)) {
    6605          12 :                 origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6606          12 :                 bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    6607          12 :                 if (bserrno != 0) {
    6608           0 :                         if (bserrno == -ENOENT) {
    6609           0 :                                 SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
    6610             :                                             " xattr to remove\n", origblob->id);
    6611           0 :                                 assert(false);
    6612             :                         } else {
    6613             :                                 /* return cluster map back to original */
    6614           0 :                                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6615           0 :                                 blob_set_thin_provision(newblob);
    6616           0 :                                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6617           0 :                                 return;
    6618             :                         }
    6619             :                 }
    6620             :         }
    6621             : 
    6622         204 :         bs_blob_list_remove(origblob);
    6623         204 :         origblob->parent_id = newblob->id;
    6624             :         /* set clone blob as thin provisioned */
    6625         204 :         blob_set_thin_provision(origblob);
    6626             : 
    6627         204 :         bs_blob_list_add(newblob);
    6628             : 
    6629             :         /* sync clone metadata */
    6630         204 :         spdk_blob_sync_md(origblob, bs_snapshot_origblob_sync_cpl, ctx);
    6631             : }
    6632             : 
    6633             : static void
    6634         208 : bs_snapshot_freeze_cpl(void *cb_arg, int rc)
    6635             : {
    6636         208 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6637         208 :         struct spdk_blob *origblob = ctx->original.blob;
    6638         208 :         struct spdk_blob *newblob = ctx->new.blob;
    6639             :         int bserrno;
    6640             : 
    6641         208 :         if (rc != 0) {
    6642           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, rc);
    6643           0 :                 return;
    6644             :         }
    6645             : 
    6646         208 :         ctx->frozen = true;
    6647             : 
    6648         208 :         if (blob_is_esnap_clone(origblob)) {
    6649             :                 /* Clean up any channels associated with the original blob id because future IO will
    6650             :                  * perform IO using the snapshot blob_id.
    6651             :                  */
    6652          12 :                 blob_esnap_destroy_bs_dev_channels(origblob, false, NULL, NULL);
    6653             :         }
    6654         208 :         if (newblob->back_bs_dev) {
    6655         208 :                 blob_back_bs_destroy(newblob);
    6656             :         }
    6657             :         /* set new back_bs_dev for snapshot */
    6658         208 :         newblob->back_bs_dev = origblob->back_bs_dev;
    6659             :         /* Set invalid flags from origblob */
    6660         208 :         newblob->invalid_flags = origblob->invalid_flags;
    6661             : 
    6662             :         /* inherit parent from original blob if set */
    6663         208 :         newblob->parent_id = origblob->parent_id;
    6664         208 :         switch (origblob->parent_id) {
    6665          12 :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    6666          12 :                 bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6667          12 :                 if (bserrno != 0) {
    6668           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6669           0 :                         return;
    6670             :                 }
    6671          12 :                 break;
    6672         144 :         case SPDK_BLOBID_INVALID:
    6673         144 :                 break;
    6674          52 :         default:
    6675             :                 /* Set internal xattr for snapshot id */
    6676          52 :                 bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
    6677          52 :                                          &origblob->parent_id, sizeof(spdk_blob_id), true);
    6678          52 :                 if (bserrno != 0) {
    6679           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6680           0 :                         return;
    6681             :                 }
    6682             :         }
    6683             : 
    6684             :         /* swap cluster maps */
    6685         208 :         bs_snapshot_swap_cluster_maps(newblob, origblob);
    6686             : 
    6687             :         /* Set the clear method on the new blob to match the original. */
    6688         208 :         blob_set_clear_method(newblob, origblob->clear_method);
    6689             : 
    6690             :         /* sync snapshot metadata */
    6691         208 :         spdk_blob_sync_md(newblob, bs_snapshot_newblob_sync_cpl, ctx);
    6692             : }
    6693             : 
    6694             : static void
    6695         212 : bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6696             : {
    6697         212 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6698         212 :         struct spdk_blob *origblob = ctx->original.blob;
    6699         212 :         struct spdk_blob *newblob = _blob;
    6700             : 
    6701         212 :         if (bserrno != 0) {
    6702           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6703           4 :                 return;
    6704             :         }
    6705             : 
    6706         208 :         ctx->new.blob = newblob;
    6707         208 :         assert(spdk_blob_is_thin_provisioned(newblob));
    6708         208 :         assert(spdk_mem_all_zero(newblob->active.clusters,
    6709             :                                  newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6710         208 :         assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6711             :                                  newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6712             : 
    6713         208 :         blob_freeze_io(origblob, bs_snapshot_freeze_cpl, ctx);
    6714             : }
    6715             : 
    6716             : static void
    6717         216 : bs_snapshot_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6718             : {
    6719         216 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6720         216 :         struct spdk_blob *origblob = ctx->original.blob;
    6721             : 
    6722         216 :         if (bserrno != 0) {
    6723           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6724           4 :                 return;
    6725             :         }
    6726             : 
    6727         212 :         ctx->new.id = blobid;
    6728         212 :         ctx->cpl.u.blobid.blobid = blobid;
    6729             : 
    6730         212 :         spdk_bs_open_blob(origblob->bs, ctx->new.id, bs_snapshot_newblob_open_cpl, ctx);
    6731             : }
    6732             : 
    6733             : 
    6734             : static void
    6735         216 : bs_xattr_snapshot(void *arg, const char *name,
    6736             :                   const void **value, size_t *value_len)
    6737             : {
    6738         216 :         assert(strncmp(name, SNAPSHOT_IN_PROGRESS, sizeof(SNAPSHOT_IN_PROGRESS)) == 0);
    6739             : 
    6740         216 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6741         216 :         *value = &blob->id;
    6742         216 :         *value_len = sizeof(blob->id);
    6743         216 : }
    6744             : 
    6745             : static void
    6746         226 : bs_snapshot_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6747             : {
    6748         226 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6749         226 :         struct spdk_blob_opts opts;
    6750         226 :         struct spdk_blob_xattr_opts internal_xattrs;
    6751         226 :         char *xattrs_names[] = { SNAPSHOT_IN_PROGRESS };
    6752             : 
    6753         226 :         if (bserrno != 0) {
    6754           6 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6755           6 :                 return;
    6756             :         }
    6757             : 
    6758         220 :         ctx->original.blob = _blob;
    6759             : 
    6760         220 :         if (_blob->data_ro || _blob->md_ro) {
    6761           4 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot from read only blob with id 0x%"
    6762             :                               PRIx64 "\n", _blob->id);
    6763           4 :                 ctx->bserrno = -EINVAL;
    6764           4 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6765           4 :                 return;
    6766             :         }
    6767             : 
    6768         216 :         if (_blob->locked_operation_in_progress) {
    6769           0 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot - another operation in progress\n");
    6770           0 :                 ctx->bserrno = -EBUSY;
    6771           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6772           0 :                 return;
    6773             :         }
    6774             : 
    6775         216 :         _blob->locked_operation_in_progress = true;
    6776             : 
    6777         216 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6778         216 :         blob_xattrs_init(&internal_xattrs);
    6779             : 
    6780             :         /* Change the size of new blob to the same as in original blob,
    6781             :          * but do not allocate clusters */
    6782         216 :         opts.thin_provision = true;
    6783         216 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6784         216 :         opts.use_extent_table = _blob->use_extent_table;
    6785             : 
    6786             :         /* If there are any xattrs specified for snapshot, set them now */
    6787         216 :         if (ctx->xattrs) {
    6788           4 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6789             :         }
    6790             :         /* Set internal xattr SNAPSHOT_IN_PROGRESS */
    6791         216 :         internal_xattrs.count = 1;
    6792         216 :         internal_xattrs.ctx = _blob;
    6793         216 :         internal_xattrs.names = xattrs_names;
    6794         216 :         internal_xattrs.get_value = bs_xattr_snapshot;
    6795             : 
    6796         216 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6797             :                        bs_snapshot_newblob_create_cpl, ctx);
    6798             : }
    6799             : 
    6800             : void
    6801         226 : spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6802             :                         const struct spdk_blob_xattr_opts *snapshot_xattrs,
    6803             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6804             : {
    6805         226 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    6806             : 
    6807         226 :         if (!ctx) {
    6808           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6809           0 :                 return;
    6810             :         }
    6811         226 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6812         226 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6813         226 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6814         226 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6815         226 :         ctx->bserrno = 0;
    6816         226 :         ctx->frozen = false;
    6817         226 :         ctx->original.id = blobid;
    6818         226 :         ctx->xattrs = snapshot_xattrs;
    6819             : 
    6820         226 :         spdk_bs_open_blob(bs, ctx->original.id, bs_snapshot_origblob_open_cpl, ctx);
    6821             : }
    6822             : /* END spdk_bs_create_snapshot */
    6823             : 
    6824             : /* START spdk_bs_create_clone */
    6825             : 
    6826             : static void
    6827          48 : bs_xattr_clone(void *arg, const char *name,
    6828             :                const void **value, size_t *value_len)
    6829             : {
    6830          48 :         assert(strncmp(name, BLOB_SNAPSHOT, sizeof(BLOB_SNAPSHOT)) == 0);
    6831             : 
    6832          48 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6833          48 :         *value = &blob->id;
    6834          48 :         *value_len = sizeof(blob->id);
    6835          48 : }
    6836             : 
    6837             : static void
    6838          48 : bs_clone_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6839             : {
    6840          48 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6841          48 :         struct spdk_blob *clone = _blob;
    6842             : 
    6843          48 :         ctx->new.blob = clone;
    6844          48 :         bs_blob_list_add(clone);
    6845             : 
    6846          48 :         spdk_blob_close(clone, bs_clone_snapshot_origblob_cleanup, ctx);
    6847          48 : }
    6848             : 
    6849             : static void
    6850          48 : bs_clone_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6851             : {
    6852          48 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6853             : 
    6854          48 :         ctx->cpl.u.blobid.blobid = blobid;
    6855          48 :         spdk_bs_open_blob(ctx->original.blob->bs, blobid, bs_clone_newblob_open_cpl, ctx);
    6856          48 : }
    6857             : 
    6858             : static void
    6859          52 : bs_clone_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6860             : {
    6861          52 :         struct spdk_clone_snapshot_ctx  *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6862          52 :         struct spdk_blob_opts           opts;
    6863          52 :         struct spdk_blob_xattr_opts internal_xattrs;
    6864          52 :         char *xattr_names[] = { BLOB_SNAPSHOT };
    6865             : 
    6866          52 :         if (bserrno != 0) {
    6867           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6868           0 :                 return;
    6869             :         }
    6870             : 
    6871          52 :         ctx->original.blob = _blob;
    6872          52 :         ctx->original.md_ro = _blob->md_ro;
    6873             : 
    6874          52 :         if (!_blob->data_ro || !_blob->md_ro) {
    6875           4 :                 SPDK_DEBUGLOG(blob, "Clone not from read-only blob\n");
    6876           4 :                 ctx->bserrno = -EINVAL;
    6877           4 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6878           4 :                 return;
    6879             :         }
    6880             : 
    6881          48 :         if (_blob->locked_operation_in_progress) {
    6882           0 :                 SPDK_DEBUGLOG(blob, "Cannot create clone - another operation in progress\n");
    6883           0 :                 ctx->bserrno = -EBUSY;
    6884           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6885           0 :                 return;
    6886             :         }
    6887             : 
    6888          48 :         _blob->locked_operation_in_progress = true;
    6889             : 
    6890          48 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6891          48 :         blob_xattrs_init(&internal_xattrs);
    6892             : 
    6893          48 :         opts.thin_provision = true;
    6894          48 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6895          48 :         opts.use_extent_table = _blob->use_extent_table;
    6896          48 :         if (ctx->xattrs) {
    6897           4 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6898             :         }
    6899             : 
    6900             :         /* Set internal xattr BLOB_SNAPSHOT */
    6901          48 :         internal_xattrs.count = 1;
    6902          48 :         internal_xattrs.ctx = _blob;
    6903          48 :         internal_xattrs.names = xattr_names;
    6904          48 :         internal_xattrs.get_value = bs_xattr_clone;
    6905             : 
    6906          48 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6907             :                        bs_clone_newblob_create_cpl, ctx);
    6908             : }
    6909             : 
    6910             : void
    6911          52 : spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6912             :                      const struct spdk_blob_xattr_opts *clone_xattrs,
    6913             :                      spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6914             : {
    6915          52 :         struct spdk_clone_snapshot_ctx  *ctx = calloc(1, sizeof(*ctx));
    6916             : 
    6917          52 :         if (!ctx) {
    6918           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6919           0 :                 return;
    6920             :         }
    6921             : 
    6922          52 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6923          52 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6924          52 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6925          52 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6926          52 :         ctx->bserrno = 0;
    6927          52 :         ctx->xattrs = clone_xattrs;
    6928          52 :         ctx->original.id = blobid;
    6929             : 
    6930          52 :         spdk_bs_open_blob(bs, ctx->original.id, bs_clone_origblob_open_cpl, ctx);
    6931             : }
    6932             : 
    6933             : /* END spdk_bs_create_clone */
    6934             : 
    6935             : /* START spdk_bs_inflate_blob */
    6936             : 
    6937             : static void
    6938          12 : bs_inflate_blob_set_parent_cpl(void *cb_arg, struct spdk_blob *_parent, int bserrno)
    6939             : {
    6940          12 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6941          12 :         struct spdk_blob *_blob = ctx->original.blob;
    6942             : 
    6943          12 :         if (bserrno != 0) {
    6944           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6945           0 :                 return;
    6946             :         }
    6947             : 
    6948             :         /* Temporarily override md_ro flag for MD modification */
    6949          12 :         _blob->md_ro = false;
    6950             : 
    6951          12 :         bserrno = blob_set_xattr(_blob, BLOB_SNAPSHOT, &_parent->id, sizeof(spdk_blob_id), true);
    6952          12 :         if (bserrno != 0) {
    6953           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6954           0 :                 return;
    6955             :         }
    6956             : 
    6957          12 :         assert(_parent != NULL);
    6958             : 
    6959          12 :         bs_blob_list_remove(_blob);
    6960          12 :         _blob->parent_id = _parent->id;
    6961             : 
    6962          12 :         blob_back_bs_destroy(_blob);
    6963          12 :         _blob->back_bs_dev = bs_create_blob_bs_dev(_parent);
    6964          12 :         bs_blob_list_add(_blob);
    6965             : 
    6966          12 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    6967             : }
    6968             : 
    6969             : static void
    6970          56 : bs_inflate_blob_done(struct spdk_clone_snapshot_ctx *ctx)
    6971             : {
    6972          56 :         struct spdk_blob *_blob = ctx->original.blob;
    6973             :         struct spdk_blob *_parent;
    6974             : 
    6975          56 :         if (ctx->allocate_all) {
    6976             :                 /* remove thin provisioning */
    6977          32 :                 bs_blob_list_remove(_blob);
    6978          32 :                 if (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    6979           8 :                         blob_remove_xattr(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    6980           8 :                         _blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6981             :                 } else {
    6982          24 :                         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    6983             :                 }
    6984          32 :                 _blob->invalid_flags = _blob->invalid_flags & ~SPDK_BLOB_THIN_PROV;
    6985          32 :                 blob_back_bs_destroy(_blob);
    6986          32 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    6987             :         } else {
    6988             :                 /* For now, esnap clones always have allocate_all set. */
    6989          24 :                 assert(!blob_is_esnap_clone(_blob));
    6990             : 
    6991          24 :                 _parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
    6992          24 :                 if (_parent->parent_id != SPDK_BLOBID_INVALID) {
    6993             :                         /* We must change the parent of the inflated blob */
    6994          12 :                         spdk_bs_open_blob(_blob->bs, _parent->parent_id,
    6995             :                                           bs_inflate_blob_set_parent_cpl, ctx);
    6996          12 :                         return;
    6997             :                 }
    6998             : 
    6999          12 :                 bs_blob_list_remove(_blob);
    7000          12 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    7001          12 :                 blob_back_bs_destroy(_blob);
    7002          12 :                 _blob->back_bs_dev = bs_create_zeroes_dev();
    7003             :         }
    7004             : 
    7005             :         /* Temporarily override md_ro flag for MD modification */
    7006          44 :         _blob->md_ro = false;
    7007          44 :         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7008          44 :         _blob->state = SPDK_BLOB_STATE_DIRTY;
    7009             : 
    7010          44 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    7011             : }
    7012             : 
    7013             : /* Check if cluster needs allocation */
    7014             : static inline bool
    7015        1200 : bs_cluster_needs_allocation(struct spdk_blob *blob, uint64_t cluster, bool allocate_all)
    7016             : {
    7017             :         struct spdk_blob_bs_dev *b;
    7018             : 
    7019        1200 :         assert(blob != NULL);
    7020             : 
    7021        1200 :         if (blob->active.clusters[cluster] != 0) {
    7022             :                 /* Cluster is already allocated */
    7023          32 :                 return false;
    7024             :         }
    7025             : 
    7026        1168 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    7027             :                 /* Blob have no parent blob */
    7028          80 :                 return allocate_all;
    7029             :         }
    7030             : 
    7031        1088 :         if (blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    7032          64 :                 return true;
    7033             :         }
    7034             : 
    7035        1024 :         b = (struct spdk_blob_bs_dev *)blob->back_bs_dev;
    7036        1024 :         return (allocate_all || b->blob->active.clusters[cluster] != 0);
    7037             : }
    7038             : 
    7039             : static void
    7040         508 : bs_inflate_blob_touch_next(void *cb_arg, int bserrno)
    7041             : {
    7042         508 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7043         508 :         struct spdk_blob *_blob = ctx->original.blob;
    7044         508 :         struct spdk_bs_cpl cpl;
    7045             :         spdk_bs_user_op_t *op;
    7046             :         uint64_t offset;
    7047             : 
    7048         508 :         if (bserrno != 0) {
    7049           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    7050           0 :                 return;
    7051             :         }
    7052             : 
    7053         656 :         for (; ctx->cluster < _blob->active.num_clusters; ctx->cluster++) {
    7054         600 :                 if (bs_cluster_needs_allocation(_blob, ctx->cluster, ctx->allocate_all)) {
    7055         452 :                         break;
    7056             :                 }
    7057             :         }
    7058             : 
    7059         508 :         if (ctx->cluster < _blob->active.num_clusters) {
    7060         452 :                 offset = bs_cluster_to_lba(_blob->bs, ctx->cluster);
    7061             : 
    7062             :                 /* We may safely increment a cluster before copying */
    7063         452 :                 ctx->cluster++;
    7064             : 
    7065             :                 /* Use a dummy 0B read as a context for cluster copy */
    7066         452 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7067         452 :                 cpl.u.blob_basic.cb_fn = bs_inflate_blob_touch_next;
    7068         452 :                 cpl.u.blob_basic.cb_arg = ctx;
    7069             : 
    7070         452 :                 op = bs_user_op_alloc(ctx->channel, &cpl, SPDK_BLOB_READ, _blob,
    7071             :                                       NULL, 0, offset, 0);
    7072         452 :                 if (!op) {
    7073           0 :                         bs_clone_snapshot_origblob_cleanup(ctx, -ENOMEM);
    7074           0 :                         return;
    7075             :                 }
    7076             : 
    7077         452 :                 bs_allocate_and_copy_cluster(_blob, ctx->channel, offset, op);
    7078             :         } else {
    7079          56 :                 bs_inflate_blob_done(ctx);
    7080             :         }
    7081             : }
    7082             : 
    7083             : static void
    7084          60 : bs_inflate_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7085             : {
    7086          60 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7087             :         uint64_t clusters_needed;
    7088             :         uint64_t i;
    7089             : 
    7090          60 :         if (bserrno != 0) {
    7091           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    7092           0 :                 return;
    7093             :         }
    7094             : 
    7095          60 :         ctx->original.blob = _blob;
    7096          60 :         ctx->original.md_ro = _blob->md_ro;
    7097             : 
    7098          60 :         if (_blob->locked_operation_in_progress) {
    7099           0 :                 SPDK_DEBUGLOG(blob, "Cannot inflate blob - another operation in progress\n");
    7100           0 :                 ctx->bserrno = -EBUSY;
    7101           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    7102           0 :                 return;
    7103             :         }
    7104             : 
    7105          60 :         _blob->locked_operation_in_progress = true;
    7106             : 
    7107          60 :         switch (_blob->parent_id) {
    7108           8 :         case SPDK_BLOBID_INVALID:
    7109           8 :                 if (!ctx->allocate_all) {
    7110             :                         /* This blob has no parent, so we cannot decouple it. */
    7111           4 :                         SPDK_ERRLOG("Cannot decouple parent of blob with no parent.\n");
    7112           4 :                         bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
    7113           4 :                         return;
    7114             :                 }
    7115           4 :                 break;
    7116           8 :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    7117             :                 /*
    7118             :                  * It would be better to rely on back_bs_dev->is_zeroes(), to determine which
    7119             :                  * clusters require allocation. Until there is a blobstore consumer that
    7120             :                  * uses esnaps with an spdk_bs_dev that implements a useful is_zeroes() it is not
    7121             :                  * worth the effort.
    7122             :                  */
    7123           8 :                 ctx->allocate_all = true;
    7124           8 :                 break;
    7125          44 :         default:
    7126          44 :                 break;
    7127             :         }
    7128             : 
    7129          56 :         if (spdk_blob_is_thin_provisioned(_blob) == false) {
    7130             :                 /* This is not thin provisioned blob. No need to inflate. */
    7131           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, 0);
    7132           0 :                 return;
    7133             :         }
    7134             : 
    7135             :         /* Do two passes - one to verify that we can obtain enough clusters
    7136             :          * and another to actually claim them.
    7137             :          */
    7138          56 :         clusters_needed = 0;
    7139         656 :         for (i = 0; i < _blob->active.num_clusters; i++) {
    7140         600 :                 if (bs_cluster_needs_allocation(_blob, i, ctx->allocate_all)) {
    7141         452 :                         clusters_needed++;
    7142             :                 }
    7143             :         }
    7144             : 
    7145          56 :         if (clusters_needed > _blob->bs->num_free_clusters) {
    7146             :                 /* Not enough free clusters. Cannot satisfy the request. */
    7147           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, -ENOSPC);
    7148           0 :                 return;
    7149             :         }
    7150             : 
    7151          56 :         ctx->cluster = 0;
    7152          56 :         bs_inflate_blob_touch_next(ctx, 0);
    7153             : }
    7154             : 
    7155             : static void
    7156          60 : bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7157             :                 spdk_blob_id blobid, bool allocate_all, spdk_blob_op_complete cb_fn, void *cb_arg)
    7158             : {
    7159          60 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    7160             : 
    7161          60 :         if (!ctx) {
    7162           0 :                 cb_fn(cb_arg, -ENOMEM);
    7163           0 :                 return;
    7164             :         }
    7165          60 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7166          60 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7167          60 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7168          60 :         ctx->bserrno = 0;
    7169          60 :         ctx->original.id = blobid;
    7170          60 :         ctx->channel = channel;
    7171          60 :         ctx->allocate_all = allocate_all;
    7172             : 
    7173          60 :         spdk_bs_open_blob(bs, ctx->original.id, bs_inflate_blob_open_cpl, ctx);
    7174             : }
    7175             : 
    7176             : void
    7177          28 : spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7178             :                      spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7179             : {
    7180          28 :         bs_inflate_blob(bs, channel, blobid, true, cb_fn, cb_arg);
    7181          28 : }
    7182             : 
    7183             : void
    7184          32 : spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7185             :                              spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7186             : {
    7187          32 :         bs_inflate_blob(bs, channel, blobid, false, cb_fn, cb_arg);
    7188          32 : }
    7189             : /* END spdk_bs_inflate_blob */
    7190             : 
    7191             : /* START spdk_bs_blob_shallow_copy */
    7192             : 
    7193             : struct shallow_copy_ctx {
    7194             :         struct spdk_bs_cpl cpl;
    7195             :         int bserrno;
    7196             : 
    7197             :         /* Blob source for copy */
    7198             :         struct spdk_blob_store *bs;
    7199             :         spdk_blob_id blobid;
    7200             :         struct spdk_blob *blob;
    7201             :         struct spdk_io_channel *blob_channel;
    7202             : 
    7203             :         /* Destination device for copy */
    7204             :         struct spdk_bs_dev *ext_dev;
    7205             :         struct spdk_io_channel *ext_channel;
    7206             : 
    7207             :         /* Current cluster for copy operation */
    7208             :         uint64_t cluster;
    7209             : 
    7210             :         /* Buffer for blob reading */
    7211             :         uint8_t *read_buff;
    7212             : 
    7213             :         /* Struct for external device writing */
    7214             :         struct spdk_bs_dev_cb_args ext_args;
    7215             : 
    7216             :         /* Actual number of copied clusters */
    7217             :         uint64_t copied_clusters_count;
    7218             : 
    7219             :         /* Status callback for updates about the ongoing operation */
    7220             :         spdk_blob_shallow_copy_status status_cb;
    7221             : 
    7222             :         /* Argument passed to function status_cb */
    7223             :         void *status_cb_arg;
    7224             : };
    7225             : 
    7226             : static void
    7227          16 : bs_shallow_copy_cleanup_finish(void *cb_arg, int bserrno)
    7228             : {
    7229          16 :         struct shallow_copy_ctx *ctx = cb_arg;
    7230          16 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    7231             : 
    7232          16 :         if (bserrno != 0) {
    7233           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, cleanup error %d\n", ctx->blob->id, bserrno);
    7234           0 :                 ctx->bserrno = bserrno;
    7235             :         }
    7236             : 
    7237          16 :         ctx->ext_dev->destroy_channel(ctx->ext_dev, ctx->ext_channel);
    7238          16 :         spdk_free(ctx->read_buff);
    7239             : 
    7240          16 :         cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    7241             : 
    7242          16 :         free(ctx);
    7243          16 : }
    7244             : 
    7245             : static void
    7246           8 : bs_shallow_copy_bdev_write_cpl(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
    7247             : {
    7248           8 :         struct shallow_copy_ctx *ctx = cb_arg;
    7249           8 :         struct spdk_blob *_blob = ctx->blob;
    7250             : 
    7251           8 :         if (bserrno != 0) {
    7252           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, ext dev write error %d\n", ctx->blob->id, bserrno);
    7253           0 :                 ctx->bserrno = bserrno;
    7254           0 :                 _blob->locked_operation_in_progress = false;
    7255           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7256           0 :                 return;
    7257             :         }
    7258             : 
    7259           8 :         ctx->cluster++;
    7260           8 :         if (ctx->status_cb) {
    7261           8 :                 ctx->copied_clusters_count++;
    7262           8 :                 ctx->status_cb(ctx->copied_clusters_count, ctx->status_cb_arg);
    7263             :         }
    7264             : 
    7265           8 :         bs_shallow_copy_cluster_find_next(ctx);
    7266             : }
    7267             : 
    7268             : static void
    7269           8 : bs_shallow_copy_blob_read_cpl(void *cb_arg, int bserrno)
    7270             : {
    7271           8 :         struct shallow_copy_ctx *ctx = cb_arg;
    7272           8 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7273           8 :         struct spdk_blob *_blob = ctx->blob;
    7274             : 
    7275           8 :         if (bserrno != 0) {
    7276           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob read error %d\n", ctx->blob->id, bserrno);
    7277           0 :                 ctx->bserrno = bserrno;
    7278           0 :                 _blob->locked_operation_in_progress = false;
    7279           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7280           0 :                 return;
    7281             :         }
    7282             : 
    7283           8 :         ctx->ext_args.channel = ctx->ext_channel;
    7284           8 :         ctx->ext_args.cb_fn = bs_shallow_copy_bdev_write_cpl;
    7285           8 :         ctx->ext_args.cb_arg = ctx;
    7286             : 
    7287           8 :         ext_dev->write(ext_dev, ctx->ext_channel, ctx->read_buff,
    7288           8 :                        bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7289           8 :                        bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7290             :                        &ctx->ext_args);
    7291             : }
    7292             : 
    7293             : static void
    7294          12 : bs_shallow_copy_cluster_find_next(void *cb_arg)
    7295             : {
    7296          12 :         struct shallow_copy_ctx *ctx = cb_arg;
    7297          12 :         struct spdk_blob *_blob = ctx->blob;
    7298             : 
    7299          20 :         while (ctx->cluster < _blob->active.num_clusters) {
    7300          16 :                 if (_blob->active.clusters[ctx->cluster] != 0) {
    7301           8 :                         break;
    7302             :                 }
    7303             : 
    7304           8 :                 ctx->cluster++;
    7305             :         }
    7306             : 
    7307          12 :         if (ctx->cluster < _blob->active.num_clusters) {
    7308           8 :                 blob_request_submit_op_single(ctx->blob_channel, _blob, ctx->read_buff,
    7309           8 :                                               bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7310           8 :                                               bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7311             :                                               bs_shallow_copy_blob_read_cpl, ctx, SPDK_BLOB_READ);
    7312             :         } else {
    7313           4 :                 _blob->locked_operation_in_progress = false;
    7314           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7315             :         }
    7316          12 : }
    7317             : 
    7318             : static void
    7319          16 : bs_shallow_copy_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7320             : {
    7321          16 :         struct shallow_copy_ctx *ctx = cb_arg;
    7322          16 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7323             :         uint32_t blob_block_size;
    7324             :         uint64_t blob_total_size;
    7325             : 
    7326          16 :         if (bserrno != 0) {
    7327           0 :                 SPDK_ERRLOG("Shallow copy blob open error %d\n", bserrno);
    7328           0 :                 ctx->bserrno = bserrno;
    7329           0 :                 bs_shallow_copy_cleanup_finish(ctx, 0);
    7330           0 :                 return;
    7331             :         }
    7332             : 
    7333          16 :         if (!spdk_blob_is_read_only(_blob)) {
    7334           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob must be read only\n", _blob->id);
    7335           4 :                 ctx->bserrno = -EPERM;
    7336           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7337           4 :                 return;
    7338             :         }
    7339             : 
    7340          12 :         blob_block_size = _blob->bs->dev->blocklen;
    7341          12 :         blob_total_size = spdk_blob_get_num_clusters(_blob) * spdk_bs_get_cluster_size(_blob->bs);
    7342             : 
    7343          12 :         if (blob_total_size > ext_dev->blockcnt * ext_dev->blocklen) {
    7344           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device must have at least blob size\n",
    7345             :                             _blob->id);
    7346           4 :                 ctx->bserrno = -EINVAL;
    7347           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7348           4 :                 return;
    7349             :         }
    7350             : 
    7351           8 :         if (blob_block_size % ext_dev->blocklen != 0) {
    7352           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device block size is not compatible with \
    7353             : blobstore block size\n", _blob->id);
    7354           4 :                 ctx->bserrno = -EINVAL;
    7355           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7356           4 :                 return;
    7357             :         }
    7358             : 
    7359           4 :         ctx->blob = _blob;
    7360             : 
    7361           4 :         if (_blob->locked_operation_in_progress) {
    7362           0 :                 SPDK_DEBUGLOG(blob, "blob 0x%" PRIx64 " shallow copy - another operation in progress\n", _blob->id);
    7363           0 :                 ctx->bserrno = -EBUSY;
    7364           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7365           0 :                 return;
    7366             :         }
    7367             : 
    7368           4 :         _blob->locked_operation_in_progress = true;
    7369             : 
    7370           4 :         ctx->cluster = 0;
    7371           4 :         bs_shallow_copy_cluster_find_next(ctx);
    7372             : }
    7373             : 
    7374             : int
    7375          16 : spdk_bs_blob_shallow_copy(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7376             :                           spdk_blob_id blobid, struct spdk_bs_dev *ext_dev,
    7377             :                           spdk_blob_shallow_copy_status status_cb_fn, void *status_cb_arg,
    7378             :                           spdk_blob_op_complete cb_fn, void *cb_arg)
    7379             : {
    7380             :         struct shallow_copy_ctx *ctx;
    7381             :         struct spdk_io_channel *ext_channel;
    7382             : 
    7383          16 :         ctx = calloc(1, sizeof(*ctx));
    7384          16 :         if (!ctx) {
    7385           0 :                 return -ENOMEM;
    7386             :         }
    7387             : 
    7388          16 :         ctx->bs = bs;
    7389          16 :         ctx->blobid = blobid;
    7390          16 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7391          16 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7392          16 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7393          16 :         ctx->bserrno = 0;
    7394          16 :         ctx->blob_channel = channel;
    7395          16 :         ctx->status_cb = status_cb_fn;
    7396          16 :         ctx->status_cb_arg = status_cb_arg;
    7397          16 :         ctx->read_buff = spdk_malloc(bs->cluster_sz, bs->dev->blocklen, NULL,
    7398             :                                      SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
    7399          16 :         if (!ctx->read_buff) {
    7400           0 :                 free(ctx);
    7401           0 :                 return -ENOMEM;
    7402             :         }
    7403             : 
    7404          16 :         ext_channel = ext_dev->create_channel(ext_dev);
    7405          16 :         if (!ext_channel) {
    7406           0 :                 spdk_free(ctx->read_buff);
    7407           0 :                 free(ctx);
    7408           0 :                 return -ENOMEM;
    7409             :         }
    7410          16 :         ctx->ext_dev = ext_dev;
    7411          16 :         ctx->ext_channel = ext_channel;
    7412             : 
    7413          16 :         spdk_bs_open_blob(ctx->bs, ctx->blobid, bs_shallow_copy_blob_open_cpl, ctx);
    7414             : 
    7415          16 :         return 0;
    7416             : }
    7417             : /* END spdk_bs_blob_shallow_copy */
    7418             : 
    7419             : /* START spdk_bs_blob_set_parent */
    7420             : 
    7421             : struct set_parent_ctx {
    7422             :         struct spdk_blob_store *bs;
    7423             :         int                     bserrno;
    7424             :         spdk_bs_op_complete     cb_fn;
    7425             :         void                    *cb_arg;
    7426             : 
    7427             :         struct spdk_blob        *blob;
    7428             :         bool                    blob_md_ro;
    7429             : 
    7430             :         struct blob_parent      parent;
    7431             : };
    7432             : 
    7433             : static void
    7434          24 : bs_set_parent_cleanup_finish(void *cb_arg, int bserrno)
    7435             : {
    7436          24 :         struct set_parent_ctx *ctx = cb_arg;
    7437             : 
    7438          24 :         assert(ctx != NULL);
    7439             : 
    7440          24 :         if (bserrno != 0) {
    7441           0 :                 SPDK_ERRLOG("blob set parent finish error %d\n", bserrno);
    7442           0 :                 if (ctx->bserrno == 0) {
    7443           0 :                         ctx->bserrno = bserrno;
    7444             :                 }
    7445             :         }
    7446             : 
    7447          24 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7448             : 
    7449          24 :         free(ctx);
    7450          24 : }
    7451             : 
    7452             : static void
    7453          20 : bs_set_parent_close_snapshot(void *cb_arg, int bserrno)
    7454             : {
    7455          20 :         struct set_parent_ctx *ctx = cb_arg;
    7456             : 
    7457          20 :         if (ctx->bserrno != 0) {
    7458           8 :                 spdk_blob_close(ctx->parent.u.snapshot.blob, bs_set_parent_cleanup_finish, ctx);
    7459           8 :                 return;
    7460             :         }
    7461             : 
    7462          12 :         if (bserrno != 0) {
    7463           0 :                 SPDK_ERRLOG("blob close error %d\n", bserrno);
    7464           0 :                 ctx->bserrno = bserrno;
    7465             :         }
    7466             : 
    7467          12 :         bs_set_parent_cleanup_finish(ctx, ctx->bserrno);
    7468             : }
    7469             : 
    7470             : static void
    7471          12 : bs_set_parent_close_blob(void *cb_arg, int bserrno)
    7472             : {
    7473          12 :         struct set_parent_ctx *ctx = cb_arg;
    7474          12 :         struct spdk_blob *blob = ctx->blob;
    7475          12 :         struct spdk_blob *snapshot = ctx->parent.u.snapshot.blob;
    7476             : 
    7477          12 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7478           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7479           0 :                 ctx->bserrno = bserrno;
    7480             :         }
    7481             : 
    7482             :         /* Revert md_ro to original state */
    7483          12 :         blob->md_ro = ctx->blob_md_ro;
    7484             : 
    7485          12 :         blob->locked_operation_in_progress = false;
    7486          12 :         snapshot->locked_operation_in_progress = false;
    7487             : 
    7488          12 :         spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7489          12 : }
    7490             : 
    7491             : static void
    7492          12 : bs_set_parent_set_back_bs_dev_done(void *cb_arg, int bserrno)
    7493             : {
    7494          12 :         struct set_parent_ctx *ctx = cb_arg;
    7495          12 :         struct spdk_blob *blob = ctx->blob;
    7496             : 
    7497          12 :         if (bserrno != 0) {
    7498           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7499           0 :                 ctx->bserrno = bserrno;
    7500           0 :                 bs_set_parent_close_blob(ctx, bserrno);
    7501           0 :                 return;
    7502             :         }
    7503             : 
    7504          12 :         spdk_blob_sync_md(blob, bs_set_parent_close_blob, ctx);
    7505             : }
    7506             : 
    7507             : static int
    7508          12 : bs_set_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7509             : {
    7510             :         int rc;
    7511             : 
    7512          12 :         bs_blob_list_remove(blob);
    7513             : 
    7514          12 :         rc = blob_set_xattr(blob, BLOB_SNAPSHOT, &parent->u.snapshot.id, sizeof(spdk_blob_id), true);
    7515          12 :         if (rc != 0) {
    7516           0 :                 SPDK_ERRLOG("error %d setting snapshot xattr\n", rc);
    7517           0 :                 return rc;
    7518             :         }
    7519          12 :         blob->parent_id = parent->u.snapshot.id;
    7520             : 
    7521          12 :         if (blob_is_esnap_clone(blob)) {
    7522             :                 /* Remove the xattr that references the external snapshot */
    7523           4 :                 blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7524           4 :                 blob_remove_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    7525             :         }
    7526             : 
    7527          12 :         bs_blob_list_add(blob);
    7528             : 
    7529          12 :         return 0;
    7530             : }
    7531             : 
    7532             : static void
    7533          20 : bs_set_parent_snapshot_open_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    7534             : {
    7535          20 :         struct set_parent_ctx *ctx = cb_arg;
    7536          20 :         struct spdk_blob *blob = ctx->blob;
    7537             :         struct spdk_bs_dev *back_bs_dev;
    7538             : 
    7539          20 :         if (bserrno != 0) {
    7540           0 :                 SPDK_ERRLOG("snapshot open error %d\n", bserrno);
    7541           0 :                 ctx->bserrno = bserrno;
    7542           0 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7543           0 :                 return;
    7544             :         }
    7545             : 
    7546          20 :         ctx->parent.u.snapshot.blob = snapshot;
    7547          20 :         ctx->parent.u.snapshot.id = snapshot->id;
    7548             : 
    7549          20 :         if (!spdk_blob_is_snapshot(snapshot)) {
    7550           4 :                 SPDK_ERRLOG("parent blob is not a snapshot\n");
    7551           4 :                 ctx->bserrno = -EINVAL;
    7552           4 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7553           4 :                 return;
    7554             :         }
    7555             : 
    7556          16 :         if (blob->active.num_clusters != snapshot->active.num_clusters) {
    7557           4 :                 SPDK_ERRLOG("parent blob has a number of clusters different from child's ones\n");
    7558           4 :                 ctx->bserrno = -EINVAL;
    7559           4 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7560           4 :                 return;
    7561             :         }
    7562             : 
    7563          12 :         if (blob->locked_operation_in_progress || snapshot->locked_operation_in_progress) {
    7564           0 :                 SPDK_ERRLOG("cannot set parent of blob, another operation in progress\n");
    7565           0 :                 ctx->bserrno = -EBUSY;
    7566           0 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7567           0 :                 return;
    7568             :         }
    7569             : 
    7570          12 :         blob->locked_operation_in_progress = true;
    7571          12 :         snapshot->locked_operation_in_progress = true;
    7572             : 
    7573             :         /* Temporarily override md_ro flag for MD modification */
    7574          12 :         blob->md_ro = false;
    7575             : 
    7576          12 :         back_bs_dev = bs_create_blob_bs_dev(snapshot);
    7577             : 
    7578          12 :         blob_set_back_bs_dev(blob, back_bs_dev, bs_set_parent_refs, &ctx->parent,
    7579             :                              bs_set_parent_set_back_bs_dev_done,
    7580             :                              ctx);
    7581             : }
    7582             : 
    7583             : static void
    7584          24 : bs_set_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7585             : {
    7586          24 :         struct set_parent_ctx *ctx = cb_arg;
    7587             : 
    7588          24 :         if (bserrno != 0) {
    7589           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7590           0 :                 ctx->bserrno = bserrno;
    7591           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7592           0 :                 return;
    7593             :         }
    7594             : 
    7595          24 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7596           4 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7597           4 :                 ctx->bserrno = -EINVAL;
    7598           4 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7599           4 :                 return;
    7600             :         }
    7601             : 
    7602          20 :         ctx->blob = blob;
    7603          20 :         ctx->blob_md_ro = blob->md_ro;
    7604             : 
    7605          20 :         spdk_bs_open_blob(ctx->bs, ctx->parent.u.snapshot.id, bs_set_parent_snapshot_open_cpl, ctx);
    7606             : }
    7607             : 
    7608             : void
    7609          36 : spdk_bs_blob_set_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7610             :                         spdk_blob_id snapshot_id, spdk_blob_op_complete cb_fn, void *cb_arg)
    7611             : {
    7612             :         struct set_parent_ctx *ctx;
    7613             : 
    7614          36 :         if (snapshot_id == SPDK_BLOBID_INVALID) {
    7615           4 :                 SPDK_ERRLOG("snapshot id not valid\n");
    7616           4 :                 cb_fn(cb_arg, -EINVAL);
    7617           4 :                 return;
    7618             :         }
    7619             : 
    7620          32 :         if (blob_id == snapshot_id) {
    7621           4 :                 SPDK_ERRLOG("blob id and snapshot id cannot be the same\n");
    7622           4 :                 cb_fn(cb_arg, -EINVAL);
    7623           4 :                 return;
    7624             :         }
    7625             : 
    7626          28 :         if (spdk_blob_get_parent_snapshot(bs, blob_id) == snapshot_id) {
    7627           4 :                 SPDK_NOTICELOG("snapshot is already the parent of blob\n");
    7628           4 :                 cb_fn(cb_arg, -EEXIST);
    7629           4 :                 return;
    7630             :         }
    7631             : 
    7632          24 :         ctx = calloc(1, sizeof(*ctx));
    7633          24 :         if (!ctx) {
    7634           0 :                 cb_fn(cb_arg, -ENOMEM);
    7635           0 :                 return;
    7636             :         }
    7637             : 
    7638          24 :         ctx->bs = bs;
    7639          24 :         ctx->parent.u.snapshot.id = snapshot_id;
    7640          24 :         ctx->cb_fn = cb_fn;
    7641          24 :         ctx->cb_arg = cb_arg;
    7642          24 :         ctx->bserrno = 0;
    7643             : 
    7644          24 :         spdk_bs_open_blob(bs, blob_id, bs_set_parent_blob_open_cpl, ctx);
    7645             : }
    7646             : /* END spdk_bs_blob_set_parent */
    7647             : 
    7648             : /* START spdk_bs_blob_set_external_parent */
    7649             : 
    7650             : static void
    7651          16 : bs_set_external_parent_cleanup_finish(void *cb_arg, int bserrno)
    7652             : {
    7653          16 :         struct set_parent_ctx *ctx = cb_arg;
    7654             : 
    7655          16 :         if (bserrno != 0) {
    7656           0 :                 SPDK_ERRLOG("blob set external parent finish error %d\n", bserrno);
    7657           0 :                 if (ctx->bserrno == 0) {
    7658           0 :                         ctx->bserrno = bserrno;
    7659             :                 }
    7660             :         }
    7661             : 
    7662          16 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7663             : 
    7664          16 :         free(ctx->parent.u.esnap.id);
    7665          16 :         free(ctx);
    7666          16 : }
    7667             : 
    7668             : static void
    7669           8 : bs_set_external_parent_close_blob(void *cb_arg, int bserrno)
    7670             : {
    7671           8 :         struct set_parent_ctx *ctx = cb_arg;
    7672           8 :         struct spdk_blob *blob = ctx->blob;
    7673             : 
    7674           8 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7675           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7676           0 :                 ctx->bserrno = bserrno;
    7677             :         }
    7678             : 
    7679             :         /* Revert md_ro to original state */
    7680           8 :         blob->md_ro = ctx->blob_md_ro;
    7681             : 
    7682           8 :         blob->locked_operation_in_progress = false;
    7683             : 
    7684           8 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7685           8 : }
    7686             : 
    7687             : static void
    7688           8 : bs_set_external_parent_unfrozen(void *cb_arg, int bserrno)
    7689             : {
    7690           8 :         struct set_parent_ctx *ctx = cb_arg;
    7691           8 :         struct spdk_blob *blob = ctx->blob;
    7692             : 
    7693           8 :         if (bserrno != 0) {
    7694           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7695           0 :                 ctx->bserrno = bserrno;
    7696           0 :                 bs_set_external_parent_close_blob(ctx, bserrno);
    7697           0 :                 return;
    7698             :         }
    7699             : 
    7700           8 :         spdk_blob_sync_md(blob, bs_set_external_parent_close_blob, ctx);
    7701             : }
    7702             : 
    7703             : static int
    7704           8 : bs_set_external_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7705             : {
    7706             :         int rc;
    7707             : 
    7708           8 :         bs_blob_list_remove(blob);
    7709             : 
    7710           8 :         if (spdk_blob_is_clone(blob)) {
    7711             :                 /* Remove the xattr that references the snapshot */
    7712           0 :                 blob->parent_id = SPDK_BLOBID_INVALID;
    7713           0 :                 blob_remove_xattr(blob, BLOB_SNAPSHOT, true);
    7714             :         }
    7715             : 
    7716           8 :         rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, parent->u.esnap.id,
    7717           8 :                             parent->u.esnap.id_len, true);
    7718           8 :         if (rc != 0) {
    7719           0 :                 SPDK_ERRLOG("error %d setting external snapshot xattr\n", rc);
    7720           0 :                 return rc;
    7721             :         }
    7722           8 :         blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7723             : 
    7724           8 :         bs_blob_list_add(blob);
    7725             : 
    7726           8 :         return 0;
    7727             : }
    7728             : 
    7729             : static void
    7730          16 : bs_set_external_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7731             : {
    7732          16 :         struct set_parent_ctx *ctx = cb_arg;
    7733          16 :         const void *esnap_id;
    7734          16 :         size_t esnap_id_len;
    7735             :         int rc;
    7736             : 
    7737          16 :         if (bserrno != 0) {
    7738           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7739           0 :                 ctx->bserrno = bserrno;
    7740           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7741           0 :                 return;
    7742             :         }
    7743             : 
    7744          16 :         ctx->blob = blob;
    7745          16 :         ctx->blob_md_ro = blob->md_ro;
    7746             : 
    7747          16 :         rc = spdk_blob_get_esnap_id(blob, &esnap_id, &esnap_id_len);
    7748          16 :         if (rc == 0 && esnap_id != NULL && esnap_id_len == ctx->parent.u.esnap.id_len &&
    7749           4 :             memcmp(esnap_id, ctx->parent.u.esnap.id, esnap_id_len) == 0) {
    7750           4 :                 SPDK_ERRLOG("external snapshot is already the parent of blob\n");
    7751           4 :                 ctx->bserrno = -EEXIST;
    7752           4 :                 goto error;
    7753             :         }
    7754             : 
    7755          12 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7756           4 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7757           4 :                 ctx->bserrno = -EINVAL;
    7758           4 :                 goto error;
    7759             :         }
    7760             : 
    7761           8 :         if (blob->locked_operation_in_progress) {
    7762           0 :                 SPDK_ERRLOG("cannot set external parent of blob, another operation in progress\n");
    7763           0 :                 ctx->bserrno = -EBUSY;
    7764           0 :                 goto error;
    7765             :         }
    7766             : 
    7767           8 :         blob->locked_operation_in_progress = true;
    7768             : 
    7769             :         /* Temporarily override md_ro flag for MD modification */
    7770           8 :         blob->md_ro = false;
    7771             : 
    7772           8 :         blob_set_back_bs_dev(blob, ctx->parent.u.esnap.back_bs_dev, bs_set_external_parent_refs,
    7773             :                              &ctx->parent, bs_set_external_parent_unfrozen, ctx);
    7774           8 :         return;
    7775             : 
    7776           8 : error:
    7777           8 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7778             : }
    7779             : 
    7780             : void
    7781          24 : spdk_bs_blob_set_external_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7782             :                                  struct spdk_bs_dev *esnap_bs_dev, const void *esnap_id,
    7783             :                                  uint32_t esnap_id_len, spdk_blob_op_complete cb_fn, void *cb_arg)
    7784             : {
    7785             :         struct set_parent_ctx *ctx;
    7786             :         uint64_t esnap_dev_size, cluster_sz;
    7787             : 
    7788          24 :         if (sizeof(blob_id) == esnap_id_len && memcmp(&blob_id, esnap_id, sizeof(blob_id)) == 0) {
    7789           4 :                 SPDK_ERRLOG("blob id and external snapshot id cannot be the same\n");
    7790           4 :                 cb_fn(cb_arg, -EINVAL);
    7791           4 :                 return;
    7792             :         }
    7793             : 
    7794          20 :         esnap_dev_size = esnap_bs_dev->blockcnt * esnap_bs_dev->blocklen;
    7795          20 :         cluster_sz = spdk_bs_get_cluster_size(bs);
    7796          20 :         if ((esnap_dev_size % cluster_sz) != 0) {
    7797           4 :                 SPDK_ERRLOG("Esnap device size %" PRIu64 " is not an integer multiple of "
    7798             :                             "cluster size %" PRIu64 "\n", esnap_dev_size, cluster_sz);
    7799           4 :                 cb_fn(cb_arg, -EINVAL);
    7800           4 :                 return;
    7801             :         }
    7802             : 
    7803          16 :         ctx = calloc(1, sizeof(*ctx));
    7804          16 :         if (!ctx) {
    7805           0 :                 cb_fn(cb_arg, -ENOMEM);
    7806           0 :                 return;
    7807             :         }
    7808             : 
    7809          16 :         ctx->parent.u.esnap.id = calloc(1, esnap_id_len);
    7810          16 :         if (!ctx->parent.u.esnap.id) {
    7811           0 :                 free(ctx);
    7812           0 :                 cb_fn(cb_arg, -ENOMEM);
    7813           0 :                 return;
    7814             :         }
    7815             : 
    7816          16 :         ctx->bs = bs;
    7817          16 :         ctx->parent.u.esnap.back_bs_dev = esnap_bs_dev;
    7818          16 :         memcpy(ctx->parent.u.esnap.id, esnap_id, esnap_id_len);
    7819          16 :         ctx->parent.u.esnap.id_len = esnap_id_len;
    7820          16 :         ctx->cb_fn = cb_fn;
    7821          16 :         ctx->cb_arg = cb_arg;
    7822          16 :         ctx->bserrno = 0;
    7823             : 
    7824          16 :         spdk_bs_open_blob(bs, blob_id, bs_set_external_parent_blob_open_cpl, ctx);
    7825             : }
    7826             : /* END spdk_bs_blob_set_external_parent */
    7827             : 
    7828             : /* START spdk_blob_resize */
    7829             : struct spdk_bs_resize_ctx {
    7830             :         spdk_blob_op_complete cb_fn;
    7831             :         void *cb_arg;
    7832             :         struct spdk_blob *blob;
    7833             :         uint64_t sz;
    7834             :         int rc;
    7835             : };
    7836             : 
    7837             : static void
    7838         202 : bs_resize_unfreeze_cpl(void *cb_arg, int rc)
    7839             : {
    7840         202 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7841             : 
    7842         202 :         if (rc != 0) {
    7843           0 :                 SPDK_ERRLOG("Unfreeze failed, rc=%d\n", rc);
    7844             :         }
    7845             : 
    7846         202 :         if (ctx->rc != 0) {
    7847           4 :                 SPDK_ERRLOG("Unfreeze failed, ctx->rc=%d\n", ctx->rc);
    7848           4 :                 rc = ctx->rc;
    7849             :         }
    7850             : 
    7851         202 :         ctx->blob->locked_operation_in_progress = false;
    7852             : 
    7853         202 :         ctx->cb_fn(ctx->cb_arg, rc);
    7854         202 :         free(ctx);
    7855         202 : }
    7856             : 
    7857             : static void
    7858         202 : bs_resize_freeze_cpl(void *cb_arg, int rc)
    7859             : {
    7860         202 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7861             : 
    7862         202 :         if (rc != 0) {
    7863           0 :                 ctx->blob->locked_operation_in_progress = false;
    7864           0 :                 ctx->cb_fn(ctx->cb_arg, rc);
    7865           0 :                 free(ctx);
    7866           0 :                 return;
    7867             :         }
    7868             : 
    7869         202 :         ctx->rc = blob_resize(ctx->blob, ctx->sz);
    7870             : 
    7871         202 :         blob_unfreeze_io(ctx->blob, bs_resize_unfreeze_cpl, ctx);
    7872             : }
    7873             : 
    7874             : void
    7875         216 : spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, void *cb_arg)
    7876             : {
    7877             :         struct spdk_bs_resize_ctx *ctx;
    7878             : 
    7879         216 :         blob_verify_md_op(blob);
    7880             : 
    7881         216 :         SPDK_DEBUGLOG(blob, "Resizing blob 0x%" PRIx64 " to %" PRIu64 " clusters\n", blob->id, sz);
    7882             : 
    7883         216 :         if (blob->md_ro) {
    7884           4 :                 cb_fn(cb_arg, -EPERM);
    7885           4 :                 return;
    7886             :         }
    7887             : 
    7888         212 :         if (sz == blob->active.num_clusters) {
    7889          10 :                 cb_fn(cb_arg, 0);
    7890          10 :                 return;
    7891             :         }
    7892             : 
    7893         202 :         if (blob->locked_operation_in_progress) {
    7894           0 :                 cb_fn(cb_arg, -EBUSY);
    7895           0 :                 return;
    7896             :         }
    7897             : 
    7898         202 :         ctx = calloc(1, sizeof(*ctx));
    7899         202 :         if (!ctx) {
    7900           0 :                 cb_fn(cb_arg, -ENOMEM);
    7901           0 :                 return;
    7902             :         }
    7903             : 
    7904         202 :         blob->locked_operation_in_progress = true;
    7905         202 :         ctx->cb_fn = cb_fn;
    7906         202 :         ctx->cb_arg = cb_arg;
    7907         202 :         ctx->blob = blob;
    7908         202 :         ctx->sz = sz;
    7909         202 :         blob_freeze_io(blob, bs_resize_freeze_cpl, ctx);
    7910             : }
    7911             : 
    7912             : /* END spdk_blob_resize */
    7913             : 
    7914             : 
    7915             : /* START spdk_bs_delete_blob */
    7916             : 
    7917             : static void
    7918        1488 : bs_delete_close_cpl(void *cb_arg, int bserrno)
    7919             : {
    7920        1488 :         spdk_bs_sequence_t *seq = cb_arg;
    7921             : 
    7922        1488 :         bs_sequence_finish(seq, bserrno);
    7923        1488 : }
    7924             : 
    7925             : static void
    7926        1488 : bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    7927             : {
    7928        1488 :         struct spdk_blob *blob = cb_arg;
    7929             : 
    7930        1488 :         if (bserrno != 0) {
    7931             :                 /*
    7932             :                  * We already removed this blob from the blobstore tailq, so
    7933             :                  *  we need to free it here since this is the last reference
    7934             :                  *  to it.
    7935             :                  */
    7936           0 :                 blob_free(blob);
    7937           0 :                 bs_delete_close_cpl(seq, bserrno);
    7938           0 :                 return;
    7939             :         }
    7940             : 
    7941             :         /*
    7942             :          * This will immediately decrement the ref_count and call
    7943             :          *  the completion routine since the metadata state is clean.
    7944             :          *  By calling spdk_blob_close, we reduce the number of call
    7945             :          *  points into code that touches the blob->open_ref count
    7946             :          *  and the blobstore's blob list.
    7947             :          */
    7948        1488 :         spdk_blob_close(blob, bs_delete_close_cpl, seq);
    7949             : }
    7950             : 
    7951             : struct delete_snapshot_ctx {
    7952             :         struct spdk_blob_list *parent_snapshot_entry;
    7953             :         struct spdk_blob *snapshot;
    7954             :         struct spdk_blob_md_page *page;
    7955             :         bool snapshot_md_ro;
    7956             :         struct spdk_blob *clone;
    7957             :         bool clone_md_ro;
    7958             :         spdk_blob_op_with_handle_complete cb_fn;
    7959             :         void *cb_arg;
    7960             :         int bserrno;
    7961             :         uint32_t next_extent_page;
    7962             : };
    7963             : 
    7964             : static void
    7965         110 : delete_blob_cleanup_finish(void *cb_arg, int bserrno)
    7966             : {
    7967         110 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7968             : 
    7969         110 :         if (bserrno != 0) {
    7970           0 :                 SPDK_ERRLOG("Snapshot cleanup error %d\n", bserrno);
    7971             :         }
    7972             : 
    7973         110 :         assert(ctx != NULL);
    7974             : 
    7975         110 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7976           0 :                 ctx->bserrno = bserrno;
    7977             :         }
    7978             : 
    7979         110 :         ctx->cb_fn(ctx->cb_arg, ctx->snapshot, ctx->bserrno);
    7980         110 :         spdk_free(ctx->page);
    7981         110 :         free(ctx);
    7982         110 : }
    7983             : 
    7984             : static void
    7985          22 : delete_snapshot_cleanup_snapshot(void *cb_arg, int bserrno)
    7986             : {
    7987          22 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7988             : 
    7989          22 :         if (bserrno != 0) {
    7990           0 :                 ctx->bserrno = bserrno;
    7991           0 :                 SPDK_ERRLOG("Clone cleanup error %d\n", bserrno);
    7992             :         }
    7993             : 
    7994          22 :         if (ctx->bserrno != 0) {
    7995          22 :                 assert(blob_lookup(ctx->snapshot->bs, ctx->snapshot->id) == NULL);
    7996          22 :                 RB_INSERT(spdk_blob_tree, &ctx->snapshot->bs->open_blobs, ctx->snapshot);
    7997          22 :                 spdk_bit_array_set(ctx->snapshot->bs->open_blobids, ctx->snapshot->id);
    7998             :         }
    7999             : 
    8000          22 :         ctx->snapshot->locked_operation_in_progress = false;
    8001          22 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8002             : 
    8003          22 :         spdk_blob_close(ctx->snapshot, delete_blob_cleanup_finish, ctx);
    8004          22 : }
    8005             : 
    8006             : static void
    8007          12 : delete_snapshot_cleanup_clone(void *cb_arg, int bserrno)
    8008             : {
    8009          12 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8010             : 
    8011          12 :         ctx->clone->locked_operation_in_progress = false;
    8012          12 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8013             : 
    8014          12 :         spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8015          12 : }
    8016             : 
    8017             : static void
    8018          48 : delete_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    8019             : {
    8020          48 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8021             : 
    8022          48 :         if (bserrno) {
    8023           0 :                 ctx->bserrno = bserrno;
    8024           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8025           0 :                 return;
    8026             :         }
    8027             : 
    8028          48 :         ctx->clone->locked_operation_in_progress = false;
    8029          48 :         spdk_blob_close(ctx->clone, delete_blob_cleanup_finish, ctx);
    8030             : }
    8031             : 
    8032             : static void
    8033          52 : delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
    8034             : {
    8035          52 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8036          52 :         struct spdk_blob_list *parent_snapshot_entry = NULL;
    8037          52 :         struct spdk_blob_list *snapshot_entry = NULL;
    8038          52 :         struct spdk_blob_list *clone_entry = NULL;
    8039          52 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8040             : 
    8041          52 :         if (bserrno) {
    8042           4 :                 SPDK_ERRLOG("Failed to sync MD on blob\n");
    8043           4 :                 ctx->bserrno = bserrno;
    8044           4 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8045           4 :                 return;
    8046             :         }
    8047             : 
    8048             :         /* Get snapshot entry for the snapshot we want to remove */
    8049          48 :         snapshot_entry = bs_get_snapshot_entry(ctx->snapshot->bs, ctx->snapshot->id);
    8050             : 
    8051          48 :         assert(snapshot_entry != NULL);
    8052             : 
    8053             :         /* Remove clone entry in this snapshot (at this point there can be only one clone) */
    8054          48 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8055          48 :         assert(clone_entry != NULL);
    8056          48 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    8057          48 :         snapshot_entry->clone_count--;
    8058          48 :         assert(TAILQ_EMPTY(&snapshot_entry->clones));
    8059             : 
    8060          48 :         switch (ctx->snapshot->parent_id) {
    8061          40 :         case SPDK_BLOBID_INVALID:
    8062             :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    8063             :                 /* No parent snapshot - just remove clone entry */
    8064          40 :                 free(clone_entry);
    8065          40 :                 break;
    8066           8 :         default:
    8067             :                 /* This snapshot is at the same time a clone of another snapshot - we need to
    8068             :                  * update parent snapshot (remove current clone, add new one inherited from
    8069             :                  * the snapshot that is being removed) */
    8070             : 
    8071             :                 /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8072             :                  * snapshot that we are removing */
    8073           8 :                 blob_get_snapshot_and_clone_entries(ctx->snapshot, &parent_snapshot_entry,
    8074             :                                                     &snapshot_clone_entry);
    8075             : 
    8076             :                 /* Switch clone entry in parent snapshot */
    8077           8 :                 TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
    8078           8 :                 TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
    8079           8 :                 free(snapshot_clone_entry);
    8080             :         }
    8081             : 
    8082             :         /* Restore md_ro flags */
    8083          48 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8084          48 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8085             : 
    8086          48 :         blob_unfreeze_io(ctx->clone, delete_snapshot_unfreeze_cpl, ctx);
    8087             : }
    8088             : 
    8089             : static void
    8090          56 : delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
    8091             : {
    8092          56 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8093             :         uint64_t i;
    8094             : 
    8095          56 :         ctx->snapshot->md_ro = false;
    8096             : 
    8097          56 :         if (bserrno) {
    8098           4 :                 SPDK_ERRLOG("Failed to sync MD on clone\n");
    8099           4 :                 ctx->bserrno = bserrno;
    8100             : 
    8101             :                 /* Restore snapshot to previous state */
    8102           4 :                 bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8103           4 :                 if (bserrno != 0) {
    8104           0 :                         delete_snapshot_cleanup_clone(ctx, bserrno);
    8105           0 :                         return;
    8106             :                 }
    8107             : 
    8108           4 :                 spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8109           4 :                 return;
    8110             :         }
    8111             : 
    8112             :         /* Clear cluster map entries for snapshot */
    8113         552 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8114         500 :                 if (ctx->clone->active.clusters[i] == ctx->snapshot->active.clusters[i]) {
    8115         492 :                         if (ctx->snapshot->active.clusters[i] != 0) {
    8116         328 :                                 ctx->snapshot->active.num_allocated_clusters--;
    8117             :                         }
    8118         492 :                         ctx->snapshot->active.clusters[i] = 0;
    8119             :                 }
    8120             :         }
    8121          78 :         for (i = 0; i < ctx->snapshot->active.num_extent_pages &&
    8122          52 :              i < ctx->clone->active.num_extent_pages; i++) {
    8123          26 :                 if (ctx->clone->active.extent_pages[i] == ctx->snapshot->active.extent_pages[i]) {
    8124          24 :                         ctx->snapshot->active.extent_pages[i] = 0;
    8125             :                 }
    8126             :         }
    8127             : 
    8128          52 :         blob_set_thin_provision(ctx->snapshot);
    8129          52 :         ctx->snapshot->state = SPDK_BLOB_STATE_DIRTY;
    8130             : 
    8131          52 :         if (ctx->parent_snapshot_entry != NULL) {
    8132           8 :                 ctx->snapshot->back_bs_dev = NULL;
    8133             :         }
    8134             : 
    8135          52 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_cpl, ctx);
    8136             : }
    8137             : 
    8138             : static void
    8139          56 : delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
    8140             : {
    8141             :         int bserrno;
    8142             : 
    8143             :         /* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
    8144          56 :         blob_back_bs_destroy(ctx->clone);
    8145             : 
    8146             :         /* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
    8147          56 :         if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    8148           8 :                 bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
    8149             :                                                  BLOB_EXTERNAL_SNAPSHOT_ID);
    8150           8 :                 if (bserrno != 0) {
    8151           0 :                         ctx->bserrno = bserrno;
    8152             : 
    8153             :                         /* Restore snapshot to previous state */
    8154           0 :                         bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8155           0 :                         if (bserrno != 0) {
    8156           0 :                                 delete_snapshot_cleanup_clone(ctx, bserrno);
    8157           0 :                                 return;
    8158             :                         }
    8159             : 
    8160           0 :                         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8161           0 :                         return;
    8162             :                 }
    8163           8 :                 ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    8164           8 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8165             :                 /* Do not delete the external snapshot along with this snapshot */
    8166           8 :                 ctx->snapshot->back_bs_dev = NULL;
    8167           8 :                 ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    8168          48 :         } else if (ctx->parent_snapshot_entry != NULL) {
    8169             :                 /* ...to parent snapshot */
    8170           8 :                 ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
    8171           8 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8172           8 :                 blob_set_xattr(ctx->clone, BLOB_SNAPSHOT, &ctx->parent_snapshot_entry->id,
    8173             :                                sizeof(spdk_blob_id),
    8174             :                                true);
    8175             :         } else {
    8176             :                 /* ...to blobid invalid and zeroes dev */
    8177          40 :                 ctx->clone->parent_id = SPDK_BLOBID_INVALID;
    8178          40 :                 ctx->clone->back_bs_dev = bs_create_zeroes_dev();
    8179          40 :                 blob_remove_xattr(ctx->clone, BLOB_SNAPSHOT, true);
    8180             :         }
    8181             : 
    8182          56 :         spdk_blob_sync_md(ctx->clone, delete_snapshot_sync_clone_cpl, ctx);
    8183             : }
    8184             : 
    8185             : static void
    8186          58 : delete_snapshot_update_extent_pages(void *cb_arg, int bserrno)
    8187             : {
    8188          58 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8189             :         uint32_t *extent_page;
    8190             :         uint64_t i;
    8191             : 
    8192          84 :         for (i = ctx->next_extent_page; i < ctx->snapshot->active.num_extent_pages &&
    8193          54 :              i < ctx->clone->active.num_extent_pages; i++) {
    8194          28 :                 if (ctx->snapshot->active.extent_pages[i] == 0) {
    8195             :                         /* No extent page to use from snapshot */
    8196           8 :                         continue;
    8197             :                 }
    8198             : 
    8199          20 :                 extent_page = &ctx->clone->active.extent_pages[i];
    8200          20 :                 if (*extent_page == 0) {
    8201             :                         /* Copy extent page from snapshot when clone did not have a matching one */
    8202          18 :                         *extent_page = ctx->snapshot->active.extent_pages[i];
    8203          18 :                         continue;
    8204             :                 }
    8205             : 
    8206             :                 /* Clone and snapshot both contain partially filled matching extent pages.
    8207             :                  * Update the clone extent page in place with cluster map containing the mix of both. */
    8208           2 :                 ctx->next_extent_page = i + 1;
    8209           2 :                 memset(ctx->page, 0, SPDK_BS_PAGE_SIZE);
    8210             : 
    8211           2 :                 blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP, ctx->page,
    8212             :                                        delete_snapshot_update_extent_pages, ctx);
    8213           2 :                 return;
    8214             :         }
    8215          56 :         delete_snapshot_update_extent_pages_cpl(ctx);
    8216             : }
    8217             : 
    8218             : static void
    8219          60 : delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
    8220             : {
    8221          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8222             :         uint64_t i;
    8223             : 
    8224             :         /* Temporarily override md_ro flag for clone for MD modification */
    8225          60 :         ctx->clone_md_ro = ctx->clone->md_ro;
    8226          60 :         ctx->clone->md_ro = false;
    8227             : 
    8228          60 :         if (bserrno) {
    8229           4 :                 SPDK_ERRLOG("Failed to sync MD with xattr on blob\n");
    8230           4 :                 ctx->bserrno = bserrno;
    8231           4 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8232           4 :                 return;
    8233             :         }
    8234             : 
    8235             :         /* Copy snapshot map to clone map (only unallocated clusters in clone) */
    8236         596 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8237         540 :                 if (ctx->clone->active.clusters[i] == 0) {
    8238         532 :                         ctx->clone->active.clusters[i] = ctx->snapshot->active.clusters[i];
    8239         532 :                         if (ctx->clone->active.clusters[i] != 0) {
    8240         368 :                                 ctx->clone->active.num_allocated_clusters++;
    8241             :                         }
    8242             :                 }
    8243             :         }
    8244          56 :         ctx->next_extent_page = 0;
    8245          56 :         delete_snapshot_update_extent_pages(ctx, 0);
    8246             : }
    8247             : 
    8248             : static void
    8249           8 : delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8250             : {
    8251           8 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8252             : 
    8253           8 :         if (bserrno != 0) {
    8254           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
    8255             :                             blob->id, bserrno);
    8256             :                 /* That error should not stop us from syncing metadata. */
    8257             :         }
    8258             : 
    8259           8 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8260           8 : }
    8261             : 
    8262             : static void
    8263          60 : delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
    8264             : {
    8265          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8266             : 
    8267          60 :         if (bserrno) {
    8268           0 :                 SPDK_ERRLOG("Failed to freeze I/O on clone\n");
    8269           0 :                 ctx->bserrno = bserrno;
    8270           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8271           0 :                 return;
    8272             :         }
    8273             : 
    8274             :         /* Temporarily override md_ro flag for snapshot for MD modification */
    8275          60 :         ctx->snapshot_md_ro = ctx->snapshot->md_ro;
    8276          60 :         ctx->snapshot->md_ro = false;
    8277             : 
    8278             :         /* Mark blob as pending for removal for power failure safety, use clone id for recovery */
    8279          60 :         ctx->bserrno = blob_set_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, &ctx->clone->id,
    8280             :                                       sizeof(spdk_blob_id), true);
    8281          60 :         if (ctx->bserrno != 0) {
    8282           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8283           0 :                 return;
    8284             :         }
    8285             : 
    8286          60 :         if (blob_is_esnap_clone(ctx->snapshot)) {
    8287           8 :                 blob_esnap_destroy_bs_dev_channels(ctx->snapshot, false,
    8288             :                                                    delete_snapshot_esnap_channels_destroyed_cb,
    8289             :                                                    ctx);
    8290           8 :                 return;
    8291             :         }
    8292             : 
    8293          52 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8294             : }
    8295             : 
    8296             : static void
    8297          70 : delete_snapshot_open_clone_cb(void *cb_arg, struct spdk_blob *clone, int bserrno)
    8298             : {
    8299          70 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8300             : 
    8301          70 :         if (bserrno) {
    8302          10 :                 SPDK_ERRLOG("Failed to open clone\n");
    8303          10 :                 ctx->bserrno = bserrno;
    8304          10 :                 delete_snapshot_cleanup_snapshot(ctx, 0);
    8305          10 :                 return;
    8306             :         }
    8307             : 
    8308          60 :         ctx->clone = clone;
    8309             : 
    8310          60 :         if (clone->locked_operation_in_progress) {
    8311           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress on its clone\n");
    8312           0 :                 ctx->bserrno = -EBUSY;
    8313           0 :                 spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8314           0 :                 return;
    8315             :         }
    8316             : 
    8317          60 :         clone->locked_operation_in_progress = true;
    8318             : 
    8319          60 :         blob_freeze_io(clone, delete_snapshot_freeze_io_cb, ctx);
    8320             : }
    8321             : 
    8322             : static void
    8323          70 : update_clone_on_snapshot_deletion(struct spdk_blob *snapshot, struct delete_snapshot_ctx *ctx)
    8324             : {
    8325          70 :         struct spdk_blob_list *snapshot_entry = NULL;
    8326          70 :         struct spdk_blob_list *clone_entry = NULL;
    8327          70 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8328             : 
    8329             :         /* Get snapshot entry for the snapshot we want to remove */
    8330          70 :         snapshot_entry = bs_get_snapshot_entry(snapshot->bs, snapshot->id);
    8331             : 
    8332          70 :         assert(snapshot_entry != NULL);
    8333             : 
    8334             :         /* Get clone of the snapshot (at this point there can be only one clone) */
    8335          70 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8336          70 :         assert(snapshot_entry->clone_count == 1);
    8337          70 :         assert(clone_entry != NULL);
    8338             : 
    8339             :         /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8340             :          * snapshot that we are removing */
    8341          70 :         blob_get_snapshot_and_clone_entries(snapshot, &ctx->parent_snapshot_entry,
    8342             :                                             &snapshot_clone_entry);
    8343             : 
    8344          70 :         spdk_bs_open_blob(snapshot->bs, clone_entry->id, delete_snapshot_open_clone_cb, ctx);
    8345          70 : }
    8346             : 
    8347             : static void
    8348        1550 : bs_delete_blob_finish(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8349             : {
    8350        1550 :         spdk_bs_sequence_t *seq = cb_arg;
    8351        1550 :         struct spdk_blob_list *snapshot_entry = NULL;
    8352             :         uint32_t page_num;
    8353             : 
    8354        1550 :         if (bserrno) {
    8355          62 :                 SPDK_ERRLOG("Failed to remove blob\n");
    8356          62 :                 bs_sequence_finish(seq, bserrno);
    8357          62 :                 return;
    8358             :         }
    8359             : 
    8360             :         /* Remove snapshot from the list */
    8361        1488 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8362        1488 :         if (snapshot_entry != NULL) {
    8363         140 :                 TAILQ_REMOVE(&blob->bs->snapshots, snapshot_entry, link);
    8364         140 :                 free(snapshot_entry);
    8365             :         }
    8366             : 
    8367        1488 :         page_num = bs_blobid_to_page(blob->id);
    8368        1488 :         spdk_bit_array_clear(blob->bs->used_blobids, page_num);
    8369        1488 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8370        1488 :         blob->active.num_pages = 0;
    8371        1488 :         blob_resize(blob, 0);
    8372             : 
    8373        1488 :         blob_persist(seq, blob, bs_delete_persist_cpl, blob);
    8374             : }
    8375             : 
    8376             : static int
    8377        1550 : bs_is_blob_deletable(struct spdk_blob *blob, bool *update_clone)
    8378             : {
    8379        1550 :         struct spdk_blob_list *snapshot_entry = NULL;
    8380        1550 :         struct spdk_blob_list *clone_entry = NULL;
    8381        1550 :         struct spdk_blob *clone = NULL;
    8382        1550 :         bool has_one_clone = false;
    8383             : 
    8384             :         /* Check if this is a snapshot with clones */
    8385        1550 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8386        1550 :         if (snapshot_entry != NULL) {
    8387         190 :                 if (snapshot_entry->clone_count > 1) {
    8388          24 :                         SPDK_ERRLOG("Cannot remove snapshot with more than one clone\n");
    8389          24 :                         return -EBUSY;
    8390         166 :                 } else if (snapshot_entry->clone_count == 1) {
    8391          70 :                         has_one_clone = true;
    8392             :                 }
    8393             :         }
    8394             : 
    8395             :         /* Check if someone has this blob open (besides this delete context):
    8396             :          * - open_ref = 1 - only this context opened blob, so it is ok to remove it
    8397             :          * - open_ref <= 2 && has_one_clone = true - clone is holding snapshot
    8398             :          *      and that is ok, because we will update it accordingly */
    8399        1526 :         if (blob->open_ref <= 2 && has_one_clone) {
    8400          70 :                 clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8401          70 :                 assert(clone_entry != NULL);
    8402          70 :                 clone = blob_lookup(blob->bs, clone_entry->id);
    8403             : 
    8404          70 :                 if (blob->open_ref == 2 && clone == NULL) {
    8405             :                         /* Clone is closed and someone else opened this blob */
    8406           0 :                         SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8407           0 :                         return -EBUSY;
    8408             :                 }
    8409             : 
    8410          70 :                 *update_clone = true;
    8411          70 :                 return 0;
    8412             :         }
    8413             : 
    8414        1456 :         if (blob->open_ref > 1) {
    8415          16 :                 SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8416          16 :                 return -EBUSY;
    8417             :         }
    8418             : 
    8419        1440 :         assert(has_one_clone == false);
    8420        1440 :         *update_clone = false;
    8421        1440 :         return 0;
    8422             : }
    8423             : 
    8424             : static void
    8425           0 : bs_delete_enomem_close_cpl(void *cb_arg, int bserrno)
    8426             : {
    8427           0 :         spdk_bs_sequence_t *seq = cb_arg;
    8428             : 
    8429           0 :         bs_sequence_finish(seq, -ENOMEM);
    8430           0 : }
    8431             : 
    8432             : static void
    8433        1560 : bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8434             : {
    8435        1560 :         spdk_bs_sequence_t *seq = cb_arg;
    8436             :         struct delete_snapshot_ctx *ctx;
    8437        1560 :         bool update_clone = false;
    8438             : 
    8439        1560 :         if (bserrno != 0) {
    8440          10 :                 bs_sequence_finish(seq, bserrno);
    8441          10 :                 return;
    8442             :         }
    8443             : 
    8444        1550 :         blob_verify_md_op(blob);
    8445             : 
    8446        1550 :         ctx = calloc(1, sizeof(*ctx));
    8447        1550 :         if (ctx == NULL) {
    8448           0 :                 spdk_blob_close(blob, bs_delete_enomem_close_cpl, seq);
    8449           0 :                 return;
    8450             :         }
    8451             : 
    8452        1550 :         ctx->snapshot = blob;
    8453        1550 :         ctx->cb_fn = bs_delete_blob_finish;
    8454        1550 :         ctx->cb_arg = seq;
    8455             : 
    8456             :         /* Check if blob can be removed and if it is a snapshot with clone on top of it */
    8457        1550 :         ctx->bserrno = bs_is_blob_deletable(blob, &update_clone);
    8458        1550 :         if (ctx->bserrno) {
    8459          40 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8460          40 :                 return;
    8461             :         }
    8462             : 
    8463        1510 :         if (blob->locked_operation_in_progress) {
    8464           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress\n");
    8465           0 :                 ctx->bserrno = -EBUSY;
    8466           0 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8467           0 :                 return;
    8468             :         }
    8469             : 
    8470        1510 :         blob->locked_operation_in_progress = true;
    8471             : 
    8472             :         /*
    8473             :          * Remove the blob from the blob_store list now, to ensure it does not
    8474             :          *  get returned after this point by blob_lookup().
    8475             :          */
    8476        1510 :         spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    8477        1510 :         RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8478             : 
    8479        1510 :         if (update_clone) {
    8480          70 :                 ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    8481          70 :                 if (!ctx->page) {
    8482           0 :                         ctx->bserrno = -ENOMEM;
    8483           0 :                         spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8484           0 :                         return;
    8485             :                 }
    8486             :                 /* This blob is a snapshot with active clone - update clone first */
    8487          70 :                 update_clone_on_snapshot_deletion(blob, ctx);
    8488             :         } else {
    8489             :                 /* This blob does not have any clones - just remove it */
    8490        1440 :                 bs_blob_list_remove(blob);
    8491        1440 :                 bs_delete_blob_finish(seq, blob, 0);
    8492        1440 :                 free(ctx);
    8493             :         }
    8494             : }
    8495             : 
    8496             : void
    8497        1560 : spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8498             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    8499             : {
    8500        1560 :         struct spdk_bs_cpl      cpl;
    8501             :         spdk_bs_sequence_t      *seq;
    8502             : 
    8503        1560 :         SPDK_DEBUGLOG(blob, "Deleting blob 0x%" PRIx64 "\n", blobid);
    8504             : 
    8505        1560 :         assert(spdk_get_thread() == bs->md_thread);
    8506             : 
    8507        1560 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8508        1560 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8509        1560 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8510             : 
    8511        1560 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8512        1560 :         if (!seq) {
    8513           0 :                 cb_fn(cb_arg, -ENOMEM);
    8514           0 :                 return;
    8515             :         }
    8516             : 
    8517        1560 :         spdk_bs_open_blob(bs, blobid, bs_delete_open_cpl, seq);
    8518             : }
    8519             : 
    8520             : /* END spdk_bs_delete_blob */
    8521             : 
    8522             : /* START spdk_bs_open_blob */
    8523             : 
    8524             : static void
    8525        3466 : bs_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8526             : {
    8527        3466 :         struct spdk_blob *blob = cb_arg;
    8528             :         struct spdk_blob *existing;
    8529             : 
    8530        3466 :         if (bserrno != 0) {
    8531          64 :                 blob_free(blob);
    8532          64 :                 seq->cpl.u.blob_handle.blob = NULL;
    8533          64 :                 bs_sequence_finish(seq, bserrno);
    8534          64 :                 return;
    8535             :         }
    8536             : 
    8537        3402 :         existing = blob_lookup(blob->bs, blob->id);
    8538        3402 :         if (existing) {
    8539           4 :                 blob_free(blob);
    8540           4 :                 existing->open_ref++;
    8541           4 :                 seq->cpl.u.blob_handle.blob = existing;
    8542           4 :                 bs_sequence_finish(seq, 0);
    8543           4 :                 return;
    8544             :         }
    8545             : 
    8546        3398 :         blob->open_ref++;
    8547             : 
    8548        3398 :         spdk_bit_array_set(blob->bs->open_blobids, blob->id);
    8549        3398 :         RB_INSERT(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8550             : 
    8551        3398 :         bs_sequence_finish(seq, bserrno);
    8552             : }
    8553             : 
    8554             : static inline void
    8555           4 : blob_open_opts_copy(const struct spdk_blob_open_opts *src, struct spdk_blob_open_opts *dst)
    8556             : {
    8557             : #define FIELD_OK(field) \
    8558             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(src->field) <= src->opts_size
    8559             : 
    8560             : #define SET_FIELD(field) \
    8561             :         if (FIELD_OK(field)) { \
    8562             :                 dst->field = src->field; \
    8563             :         } \
    8564             : 
    8565           4 :         SET_FIELD(clear_method);
    8566           4 :         SET_FIELD(esnap_ctx);
    8567             : 
    8568           4 :         dst->opts_size = src->opts_size;
    8569             : 
    8570             :         /* You should not remove this statement, but need to update the assert statement
    8571             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    8572             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_open_opts) == 24, "Incorrect size");
    8573             : 
    8574             : #undef FIELD_OK
    8575             : #undef SET_FIELD
    8576           4 : }
    8577             : 
    8578             : static void
    8579        4263 : bs_open_blob(struct spdk_blob_store *bs,
    8580             :              spdk_blob_id blobid,
    8581             :              struct spdk_blob_open_opts *opts,
    8582             :              spdk_blob_op_with_handle_complete cb_fn,
    8583             :              void *cb_arg)
    8584             : {
    8585             :         struct spdk_blob                *blob;
    8586        4263 :         struct spdk_bs_cpl              cpl;
    8587        4263 :         struct spdk_blob_open_opts      opts_local;
    8588             :         spdk_bs_sequence_t              *seq;
    8589             :         uint32_t                        page_num;
    8590             : 
    8591        4263 :         SPDK_DEBUGLOG(blob, "Opening blob 0x%" PRIx64 "\n", blobid);
    8592        4263 :         assert(spdk_get_thread() == bs->md_thread);
    8593             : 
    8594        4263 :         page_num = bs_blobid_to_page(blobid);
    8595        4263 :         if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
    8596             :                 /* Invalid blobid */
    8597          48 :                 cb_fn(cb_arg, NULL, -ENOENT);
    8598          48 :                 return;
    8599             :         }
    8600             : 
    8601        4215 :         blob = blob_lookup(bs, blobid);
    8602        4215 :         if (blob) {
    8603         749 :                 blob->open_ref++;
    8604         749 :                 cb_fn(cb_arg, blob, 0);
    8605         749 :                 return;
    8606             :         }
    8607             : 
    8608        3466 :         blob = blob_alloc(bs, blobid);
    8609        3466 :         if (!blob) {
    8610           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8611           0 :                 return;
    8612             :         }
    8613             : 
    8614        3466 :         spdk_blob_open_opts_init(&opts_local, sizeof(opts_local));
    8615        3466 :         if (opts) {
    8616           4 :                 blob_open_opts_copy(opts, &opts_local);
    8617             :         }
    8618             : 
    8619        3466 :         blob->clear_method = opts_local.clear_method;
    8620             : 
    8621        3466 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE;
    8622        3466 :         cpl.u.blob_handle.cb_fn = cb_fn;
    8623        3466 :         cpl.u.blob_handle.cb_arg = cb_arg;
    8624        3466 :         cpl.u.blob_handle.blob = blob;
    8625        3466 :         cpl.u.blob_handle.esnap_ctx = opts_local.esnap_ctx;
    8626             : 
    8627        3466 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8628        3466 :         if (!seq) {
    8629           0 :                 blob_free(blob);
    8630           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8631           0 :                 return;
    8632             :         }
    8633             : 
    8634        3466 :         blob_load(seq, blob, bs_open_blob_cpl, blob);
    8635             : }
    8636             : 
    8637             : void
    8638        4259 : spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8639             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8640             : {
    8641        4259 :         bs_open_blob(bs, blobid, NULL, cb_fn, cb_arg);
    8642        4259 : }
    8643             : 
    8644             : void
    8645           4 : spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8646             :                       struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8647             : {
    8648           4 :         bs_open_blob(bs, blobid, opts, cb_fn, cb_arg);
    8649           4 : }
    8650             : 
    8651             : /* END spdk_bs_open_blob */
    8652             : 
    8653             : /* START spdk_blob_set_read_only */
    8654             : int
    8655         232 : spdk_blob_set_read_only(struct spdk_blob *blob)
    8656             : {
    8657         232 :         blob_verify_md_op(blob);
    8658             : 
    8659         232 :         blob->data_ro_flags |= SPDK_BLOB_READ_ONLY;
    8660             : 
    8661         232 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8662         232 :         return 0;
    8663             : }
    8664             : /* END spdk_blob_set_read_only */
    8665             : 
    8666             : /* START spdk_blob_sync_md */
    8667             : 
    8668             : static void
    8669        1591 : blob_sync_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8670             : {
    8671        1591 :         struct spdk_blob *blob = cb_arg;
    8672             : 
    8673        1591 :         if (bserrno == 0 && (blob->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
    8674         400 :                 blob->data_ro = true;
    8675         400 :                 blob->md_ro = true;
    8676             :         }
    8677             : 
    8678        1591 :         bs_sequence_finish(seq, bserrno);
    8679        1591 : }
    8680             : 
    8681             : static void
    8682        1591 : blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8683             : {
    8684        1591 :         struct spdk_bs_cpl      cpl;
    8685             :         spdk_bs_sequence_t      *seq;
    8686             : 
    8687        1591 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8688        1591 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8689        1591 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8690             : 
    8691        1591 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8692        1591 :         if (!seq) {
    8693           0 :                 cb_fn(cb_arg, -ENOMEM);
    8694           0 :                 return;
    8695             :         }
    8696             : 
    8697        1591 :         blob_persist(seq, blob, blob_sync_md_cpl, blob);
    8698             : }
    8699             : 
    8700             : void
    8701        1081 : spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8702             : {
    8703        1081 :         blob_verify_md_op(blob);
    8704             : 
    8705        1081 :         SPDK_DEBUGLOG(blob, "Syncing blob 0x%" PRIx64 "\n", blob->id);
    8706             : 
    8707        1081 :         if (blob->md_ro) {
    8708           4 :                 assert(blob->state == SPDK_BLOB_STATE_CLEAN);
    8709           4 :                 cb_fn(cb_arg, 0);
    8710           4 :                 return;
    8711             :         }
    8712             : 
    8713        1077 :         blob_sync_md(blob, cb_fn, cb_arg);
    8714             : }
    8715             : 
    8716             : /* END spdk_blob_sync_md */
    8717             : 
    8718             : struct spdk_blob_cluster_op_ctx {
    8719             :         struct spdk_thread      *thread;
    8720             :         struct spdk_blob        *blob;
    8721             :         uint32_t                cluster_num;    /* cluster index in blob */
    8722             :         uint32_t                cluster;        /* cluster on disk */
    8723             :         uint32_t                extent_page;    /* extent page on disk */
    8724             :         struct spdk_blob_md_page *page; /* preallocated extent page */
    8725             :         int                     rc;
    8726             :         spdk_blob_op_complete   cb_fn;
    8727             :         void                    *cb_arg;
    8728             : };
    8729             : 
    8730             : static void
    8731         876 : blob_op_cluster_msg_cpl(void *arg)
    8732             : {
    8733         876 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8734             : 
    8735         876 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    8736         876 :         free(ctx);
    8737         876 : }
    8738             : 
    8739             : static void
    8740         846 : blob_op_cluster_msg_cb(void *arg, int bserrno)
    8741             : {
    8742         846 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8743             : 
    8744         846 :         ctx->rc = bserrno;
    8745         846 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8746         846 : }
    8747             : 
    8748             : static void
    8749          82 : blob_insert_new_ep_cb(void *arg, int bserrno)
    8750             : {
    8751          82 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8752             :         uint32_t *extent_page;
    8753             : 
    8754          82 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8755          82 :         *extent_page = ctx->extent_page;
    8756          82 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8757          82 :         blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8758          82 : }
    8759             : 
    8760             : struct spdk_blob_write_extent_page_ctx {
    8761             :         struct spdk_blob_store          *bs;
    8762             : 
    8763             :         uint32_t                        extent;
    8764             :         struct spdk_blob_md_page        *page;
    8765             : };
    8766             : 
    8767             : static void
    8768          26 : blob_free_cluster_msg_cb(void *arg, int bserrno)
    8769             : {
    8770          26 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8771             : 
    8772          26 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8773          26 :         bs_release_cluster(ctx->blob->bs, ctx->cluster);
    8774          26 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8775             : 
    8776          26 :         ctx->rc = bserrno;
    8777          26 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8778          26 : }
    8779             : 
    8780             : static void
    8781          26 : blob_free_cluster_update_ep_cb(void *arg, int bserrno)
    8782             : {
    8783          26 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8784             : 
    8785          26 :         if (bserrno != 0 || ctx->blob->bs->clean == 0) {
    8786          26 :                 blob_free_cluster_msg_cb(ctx, bserrno);
    8787          26 :                 return;
    8788             :         }
    8789             : 
    8790           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8791           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8792             : }
    8793             : 
    8794             : static void
    8795           0 : blob_free_cluster_free_ep_cb(void *arg, int bserrno)
    8796             : {
    8797           0 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8798             : 
    8799           0 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8800           0 :         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8801           0 :         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8802           0 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8803           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8804           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8805           0 : }
    8806             : 
    8807             : static void
    8808         434 : blob_persist_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8809             : {
    8810         434 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8811             : 
    8812         434 :         free(ctx);
    8813         434 :         bs_sequence_finish(seq, bserrno);
    8814         434 : }
    8815             : 
    8816             : static void
    8817         434 : blob_write_extent_page_ready(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8818             : {
    8819         434 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8820             : 
    8821         434 :         if (bserrno != 0) {
    8822           0 :                 blob_persist_extent_page_cpl(seq, ctx, bserrno);
    8823           0 :                 return;
    8824             :         }
    8825         434 :         bs_sequence_write_dev(seq, ctx->page, bs_md_page_to_lba(ctx->bs, ctx->extent),
    8826         434 :                               bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    8827             :                               blob_persist_extent_page_cpl, ctx);
    8828             : }
    8829             : 
    8830             : static void
    8831         434 : blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
    8832             :                        struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    8833             : {
    8834             :         struct spdk_blob_write_extent_page_ctx  *ctx;
    8835             :         spdk_bs_sequence_t                      *seq;
    8836         434 :         struct spdk_bs_cpl                      cpl;
    8837             : 
    8838         434 :         ctx = calloc(1, sizeof(*ctx));
    8839         434 :         if (!ctx) {
    8840           0 :                 cb_fn(cb_arg, -ENOMEM);
    8841           0 :                 return;
    8842             :         }
    8843         434 :         ctx->bs = blob->bs;
    8844         434 :         ctx->extent = extent;
    8845         434 :         ctx->page = page;
    8846             : 
    8847         434 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8848         434 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8849         434 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8850             : 
    8851         434 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8852         434 :         if (!seq) {
    8853           0 :                 free(ctx);
    8854           0 :                 cb_fn(cb_arg, -ENOMEM);
    8855           0 :                 return;
    8856             :         }
    8857             : 
    8858         434 :         assert(page);
    8859         434 :         page->next = SPDK_INVALID_MD_PAGE;
    8860         434 :         page->id = blob->id;
    8861         434 :         page->sequence_num = 0;
    8862             : 
    8863         434 :         blob_serialize_extent_page(blob, cluster_num, page);
    8864             : 
    8865         434 :         page->crc = blob_md_page_calc_crc(page);
    8866             : 
    8867         434 :         assert(spdk_bit_array_get(blob->bs->used_md_pages, extent) == true);
    8868             : 
    8869         434 :         bs_mark_dirty(seq, blob->bs, blob_write_extent_page_ready, ctx);
    8870             : }
    8871             : 
    8872             : static void
    8873         816 : blob_insert_cluster_msg(void *arg)
    8874             : {
    8875         816 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8876             :         uint32_t *extent_page;
    8877             : 
    8878         816 :         ctx->rc = blob_insert_cluster(ctx->blob, ctx->cluster_num, ctx->cluster);
    8879         816 :         if (ctx->rc != 0) {
    8880           4 :                 spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8881           4 :                 return;
    8882             :         }
    8883             : 
    8884         812 :         if (ctx->blob->use_extent_table == false) {
    8885             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    8886         406 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8887         406 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8888         406 :                 return;
    8889             :         }
    8890             : 
    8891         406 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8892         406 :         if (*extent_page == 0) {
    8893             :                 /* Extent page requires allocation.
    8894             :                  * It was already claimed in the used_md_pages map and placed in ctx. */
    8895          82 :                 assert(ctx->extent_page != 0);
    8896          82 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8897          82 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    8898             :                                        blob_insert_new_ep_cb, ctx);
    8899             :         } else {
    8900             :                 /* It is possible for original thread to allocate extent page for
    8901             :                  * different cluster in the same extent page. In such case proceed with
    8902             :                  * updating the existing extent page, but release the additional one. */
    8903         324 :                 if (ctx->extent_page != 0) {
    8904           0 :                         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8905           0 :                         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8906           0 :                         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8907           0 :                         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8908           0 :                         ctx->extent_page = 0;
    8909             :                 }
    8910             :                 /* Extent page already allocated.
    8911             :                  * Every cluster allocation, requires just an update of single extent page. */
    8912         324 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    8913             :                                        blob_op_cluster_msg_cb, ctx);
    8914             :         }
    8915             : }
    8916             : 
    8917             : static void
    8918         816 : blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
    8919             :                                  uint64_t cluster, uint32_t extent_page, struct spdk_blob_md_page *page,
    8920             :                                  spdk_blob_op_complete cb_fn, void *cb_arg)
    8921             : {
    8922             :         struct spdk_blob_cluster_op_ctx *ctx;
    8923             : 
    8924         816 :         ctx = calloc(1, sizeof(*ctx));
    8925         816 :         if (ctx == NULL) {
    8926           0 :                 cb_fn(cb_arg, -ENOMEM);
    8927           0 :                 return;
    8928             :         }
    8929             : 
    8930         816 :         ctx->thread = spdk_get_thread();
    8931         816 :         ctx->blob = blob;
    8932         816 :         ctx->cluster_num = cluster_num;
    8933         816 :         ctx->cluster = cluster;
    8934         816 :         ctx->extent_page = extent_page;
    8935         816 :         ctx->page = page;
    8936         816 :         ctx->cb_fn = cb_fn;
    8937         816 :         ctx->cb_arg = cb_arg;
    8938             : 
    8939         816 :         spdk_thread_send_msg(blob->bs->md_thread, blob_insert_cluster_msg, ctx);
    8940             : }
    8941             : 
    8942             : static void
    8943          60 : blob_free_cluster_msg(void *arg)
    8944             : {
    8945          60 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8946             :         uint32_t *extent_page;
    8947             :         uint32_t start_cluster_idx;
    8948          60 :         bool free_extent_page = true;
    8949             :         size_t i;
    8950             : 
    8951          60 :         ctx->cluster = bs_lba_to_cluster(ctx->blob->bs, ctx->blob->active.clusters[ctx->cluster_num]);
    8952             : 
    8953             :         /* There were concurrent unmaps to the same cluster, only release the cluster on the first one */
    8954          60 :         if (ctx->cluster == 0) {
    8955           8 :                 blob_op_cluster_msg_cb(ctx, 0);
    8956           8 :                 return;
    8957             :         }
    8958             : 
    8959          52 :         ctx->blob->active.clusters[ctx->cluster_num] = 0;
    8960          52 :         if (ctx->cluster != 0) {
    8961          52 :                 ctx->blob->active.num_allocated_clusters--;
    8962             :         }
    8963             : 
    8964          52 :         if (ctx->blob->use_extent_table == false) {
    8965             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    8966          26 :                 spdk_spin_lock(&ctx->blob->bs->used_lock);
    8967          26 :                 bs_release_cluster(ctx->blob->bs, ctx->cluster);
    8968          26 :                 spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8969          26 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8970          26 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8971          26 :                 return;
    8972             :         }
    8973             : 
    8974          26 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8975             : 
    8976             :         /* There shouldn't be parallel release operations on same cluster */
    8977          26 :         assert(*extent_page == ctx->extent_page);
    8978             : 
    8979          26 :         start_cluster_idx = (ctx->cluster_num / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    8980          48 :         for (i = 0; i < SPDK_EXTENTS_PER_EP; ++i) {
    8981          48 :                 if (ctx->blob->active.clusters[start_cluster_idx + i] != 0) {
    8982          26 :                         free_extent_page = false;
    8983          26 :                         break;
    8984             :                 }
    8985             :         }
    8986             : 
    8987          26 :         if (free_extent_page) {
    8988           0 :                 assert(ctx->extent_page != 0);
    8989           0 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8990           0 :                 ctx->blob->active.extent_pages[bs_cluster_to_extent_table_id(ctx->cluster_num)] = 0;
    8991           0 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    8992             :                                        blob_free_cluster_free_ep_cb, ctx);
    8993             :         } else {
    8994          26 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    8995             :                                        blob_free_cluster_update_ep_cb, ctx);
    8996             :         }
    8997             : }
    8998             : 
    8999             : 
    9000             : static void
    9001          60 : blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num, uint32_t extent_page,
    9002             :                                struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    9003             : {
    9004             :         struct spdk_blob_cluster_op_ctx *ctx;
    9005             : 
    9006          60 :         ctx = calloc(1, sizeof(*ctx));
    9007          60 :         if (ctx == NULL) {
    9008           0 :                 cb_fn(cb_arg, -ENOMEM);
    9009           0 :                 return;
    9010             :         }
    9011             : 
    9012          60 :         ctx->thread = spdk_get_thread();
    9013          60 :         ctx->blob = blob;
    9014          60 :         ctx->cluster_num = cluster_num;
    9015          60 :         ctx->extent_page = extent_page;
    9016          60 :         ctx->page = page;
    9017          60 :         ctx->cb_fn = cb_fn;
    9018          60 :         ctx->cb_arg = cb_arg;
    9019             : 
    9020          60 :         spdk_thread_send_msg(blob->bs->md_thread, blob_free_cluster_msg, ctx);
    9021             : }
    9022             : 
    9023             : /* START spdk_blob_close */
    9024             : 
    9025             : static void
    9026        4151 : blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9027             : {
    9028        4151 :         struct spdk_blob *blob = cb_arg;
    9029             : 
    9030        4151 :         if (bserrno == 0) {
    9031        4151 :                 blob->open_ref--;
    9032        4151 :                 if (blob->open_ref == 0) {
    9033             :                         /*
    9034             :                          * Blobs with active.num_pages == 0 are deleted blobs.
    9035             :                          *  these blobs are removed from the blob_store list
    9036             :                          *  when the deletion process starts - so don't try to
    9037             :                          *  remove them again.
    9038             :                          */
    9039        3398 :                         if (blob->active.num_pages > 0) {
    9040        1910 :                                 spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    9041        1910 :                                 RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    9042             :                         }
    9043        3398 :                         blob_free(blob);
    9044             :                 }
    9045             :         }
    9046             : 
    9047        4151 :         bs_sequence_finish(seq, bserrno);
    9048        4151 : }
    9049             : 
    9050             : static void
    9051         112 : blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
    9052             : {
    9053         112 :         spdk_bs_sequence_t      *seq = cb_arg;
    9054             : 
    9055         112 :         if (bserrno != 0) {
    9056           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
    9057             :                               blob->id, bserrno);
    9058           0 :                 bs_sequence_finish(seq, bserrno);
    9059           0 :                 return;
    9060             :         }
    9061             : 
    9062         112 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n",
    9063             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
    9064             : 
    9065             :         /* Sync metadata */
    9066         112 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9067             : }
    9068             : 
    9069             : void
    9070        4151 : spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    9071             : {
    9072        4151 :         struct spdk_bs_cpl      cpl;
    9073             :         spdk_bs_sequence_t      *seq;
    9074             : 
    9075        4151 :         blob_verify_md_op(blob);
    9076             : 
    9077        4151 :         SPDK_DEBUGLOG(blob, "Closing blob 0x%" PRIx64 "\n", blob->id);
    9078             : 
    9079        4151 :         if (blob->open_ref == 0) {
    9080           0 :                 cb_fn(cb_arg, -EBADF);
    9081           0 :                 return;
    9082             :         }
    9083             : 
    9084        4151 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    9085        4151 :         cpl.u.blob_basic.cb_fn = cb_fn;
    9086        4151 :         cpl.u.blob_basic.cb_arg = cb_arg;
    9087             : 
    9088        4151 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    9089        4151 :         if (!seq) {
    9090           0 :                 cb_fn(cb_arg, -ENOMEM);
    9091           0 :                 return;
    9092             :         }
    9093             : 
    9094        4151 :         if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
    9095         112 :                 blob_esnap_destroy_bs_dev_channels(blob, false, blob_close_esnap_done, seq);
    9096         112 :                 return;
    9097             :         }
    9098             : 
    9099             :         /* Sync metadata */
    9100        4039 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9101             : }
    9102             : 
    9103             : /* END spdk_blob_close */
    9104             : 
    9105         229 : struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
    9106             : {
    9107         229 :         return spdk_get_io_channel(bs);
    9108             : }
    9109             : 
    9110             : void
    9111         229 : spdk_bs_free_io_channel(struct spdk_io_channel *channel)
    9112             : {
    9113         229 :         blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
    9114         229 :         spdk_put_io_channel(channel);
    9115         229 : }
    9116             : 
    9117             : void
    9118         108 : spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9119             :                    uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9120             : {
    9121         108 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9122             :                                SPDK_BLOB_UNMAP);
    9123         108 : }
    9124             : 
    9125             : void
    9126          48 : spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9127             :                           uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9128             : {
    9129          48 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9130             :                                SPDK_BLOB_WRITE_ZEROES);
    9131          48 : }
    9132             : 
    9133             : void
    9134       20868 : spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9135             :                    void *payload, uint64_t offset, uint64_t length,
    9136             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9137             : {
    9138       20868 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9139             :                                SPDK_BLOB_WRITE);
    9140       20868 : }
    9141             : 
    9142             : void
    9143       17500 : spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9144             :                   void *payload, uint64_t offset, uint64_t length,
    9145             :                   spdk_blob_op_complete cb_fn, void *cb_arg)
    9146             : {
    9147       17500 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9148             :                                SPDK_BLOB_READ);
    9149       17500 : }
    9150             : 
    9151             : void
    9152         140 : spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9153             :                     struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9154             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    9155             : {
    9156         140 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false, NULL);
    9157         140 : }
    9158             : 
    9159             : void
    9160         940 : spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9161             :                    struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9162             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9163             : {
    9164         940 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true, NULL);
    9165         940 : }
    9166             : 
    9167             : void
    9168         208 : spdk_blob_io_writev_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9169             :                         struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9170             :                         spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9171             : {
    9172         208 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false,
    9173             :                                    io_opts);
    9174         208 : }
    9175             : 
    9176             : void
    9177        1300 : spdk_blob_io_readv_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9178             :                        struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9179             :                        spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9180             : {
    9181        1300 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true,
    9182             :                                    io_opts);
    9183        1300 : }
    9184             : 
    9185             : struct spdk_bs_iter_ctx {
    9186             :         int64_t page_num;
    9187             :         struct spdk_blob_store *bs;
    9188             : 
    9189             :         spdk_blob_op_with_handle_complete cb_fn;
    9190             :         void *cb_arg;
    9191             : };
    9192             : 
    9193             : static void
    9194        1164 : bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    9195             : {
    9196        1164 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9197        1164 :         struct spdk_blob_store *bs = ctx->bs;
    9198             :         spdk_blob_id id;
    9199             : 
    9200        1164 :         if (bserrno == 0) {
    9201         444 :                 ctx->cb_fn(ctx->cb_arg, _blob, bserrno);
    9202         444 :                 free(ctx);
    9203         444 :                 return;
    9204             :         }
    9205             : 
    9206         720 :         ctx->page_num++;
    9207         720 :         ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
    9208         720 :         if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
    9209         268 :                 ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
    9210         268 :                 free(ctx);
    9211         268 :                 return;
    9212             :         }
    9213             : 
    9214         452 :         id = bs_page_to_blobid(ctx->page_num);
    9215             : 
    9216         452 :         spdk_bs_open_blob(bs, id, bs_iter_cpl, ctx);
    9217             : }
    9218             : 
    9219             : void
    9220         292 : spdk_bs_iter_first(struct spdk_blob_store *bs,
    9221             :                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9222             : {
    9223             :         struct spdk_bs_iter_ctx *ctx;
    9224             : 
    9225         292 :         ctx = calloc(1, sizeof(*ctx));
    9226         292 :         if (!ctx) {
    9227           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9228           0 :                 return;
    9229             :         }
    9230             : 
    9231         292 :         ctx->page_num = -1;
    9232         292 :         ctx->bs = bs;
    9233         292 :         ctx->cb_fn = cb_fn;
    9234         292 :         ctx->cb_arg = cb_arg;
    9235             : 
    9236         292 :         bs_iter_cpl(ctx, NULL, -1);
    9237             : }
    9238             : 
    9239             : static void
    9240         420 : bs_iter_close_cpl(void *cb_arg, int bserrno)
    9241             : {
    9242         420 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9243             : 
    9244         420 :         bs_iter_cpl(ctx, NULL, -1);
    9245         420 : }
    9246             : 
    9247             : void
    9248         420 : spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
    9249             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9250             : {
    9251             :         struct spdk_bs_iter_ctx *ctx;
    9252             : 
    9253         420 :         assert(blob != NULL);
    9254             : 
    9255         420 :         ctx = calloc(1, sizeof(*ctx));
    9256         420 :         if (!ctx) {
    9257           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9258           0 :                 return;
    9259             :         }
    9260             : 
    9261         420 :         ctx->page_num = bs_blobid_to_page(blob->id);
    9262         420 :         ctx->bs = bs;
    9263         420 :         ctx->cb_fn = cb_fn;
    9264         420 :         ctx->cb_arg = cb_arg;
    9265             : 
    9266             :         /* Close the existing blob */
    9267         420 :         spdk_blob_close(blob, bs_iter_close_cpl, ctx);
    9268             : }
    9269             : 
    9270             : static int
    9271         943 : blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9272             :                uint16_t value_len, bool internal)
    9273             : {
    9274             :         struct spdk_xattr_tailq *xattrs;
    9275             :         struct spdk_xattr       *xattr;
    9276             :         size_t                  desc_size;
    9277             :         void                    *tmp;
    9278             : 
    9279         943 :         blob_verify_md_op(blob);
    9280             : 
    9281         943 :         if (blob->md_ro) {
    9282           4 :                 return -EPERM;
    9283             :         }
    9284             : 
    9285         939 :         desc_size = sizeof(struct spdk_blob_md_descriptor_xattr) + strlen(name) + value_len;
    9286         939 :         if (desc_size > SPDK_BS_MAX_DESC_SIZE) {
    9287           4 :                 SPDK_DEBUGLOG(blob, "Xattr '%s' of size %zu does not fix into single page %zu\n", name,
    9288             :                               desc_size, SPDK_BS_MAX_DESC_SIZE);
    9289           4 :                 return -ENOMEM;
    9290             :         }
    9291             : 
    9292         935 :         if (internal) {
    9293         724 :                 xattrs = &blob->xattrs_internal;
    9294         724 :                 blob->invalid_flags |= SPDK_BLOB_INTERNAL_XATTR;
    9295             :         } else {
    9296         211 :                 xattrs = &blob->xattrs;
    9297             :         }
    9298             : 
    9299        1158 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9300         332 :                 if (!strcmp(name, xattr->name)) {
    9301         109 :                         tmp = malloc(value_len);
    9302         109 :                         if (!tmp) {
    9303           0 :                                 return -ENOMEM;
    9304             :                         }
    9305             : 
    9306         109 :                         free(xattr->value);
    9307         109 :                         xattr->value_len = value_len;
    9308         109 :                         xattr->value = tmp;
    9309         109 :                         memcpy(xattr->value, value, value_len);
    9310             : 
    9311         109 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9312             : 
    9313         109 :                         return 0;
    9314             :                 }
    9315             :         }
    9316             : 
    9317         826 :         xattr = calloc(1, sizeof(*xattr));
    9318         826 :         if (!xattr) {
    9319           0 :                 return -ENOMEM;
    9320             :         }
    9321             : 
    9322         826 :         xattr->name = strdup(name);
    9323         826 :         if (!xattr->name) {
    9324           0 :                 free(xattr);
    9325           0 :                 return -ENOMEM;
    9326             :         }
    9327             : 
    9328         826 :         xattr->value_len = value_len;
    9329         826 :         xattr->value = malloc(value_len);
    9330         826 :         if (!xattr->value) {
    9331           0 :                 free(xattr->name);
    9332           0 :                 free(xattr);
    9333           0 :                 return -ENOMEM;
    9334             :         }
    9335         826 :         memcpy(xattr->value, value, value_len);
    9336         826 :         TAILQ_INSERT_TAIL(xattrs, xattr, link);
    9337             : 
    9338         826 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    9339             : 
    9340         826 :         return 0;
    9341             : }
    9342             : 
    9343             : int
    9344         183 : spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9345             :                     uint16_t value_len)
    9346             : {
    9347         183 :         return blob_set_xattr(blob, name, value, value_len, false);
    9348             : }
    9349             : 
    9350             : static int
    9351         404 : blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal)
    9352             : {
    9353             :         struct spdk_xattr_tailq *xattrs;
    9354             :         struct spdk_xattr       *xattr;
    9355             : 
    9356         404 :         blob_verify_md_op(blob);
    9357             : 
    9358         404 :         if (blob->md_ro) {
    9359           4 :                 return -EPERM;
    9360             :         }
    9361         400 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9362             : 
    9363         412 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9364         360 :                 if (!strcmp(name, xattr->name)) {
    9365         348 :                         TAILQ_REMOVE(xattrs, xattr, link);
    9366         348 :                         free(xattr->value);
    9367         348 :                         free(xattr->name);
    9368         348 :                         free(xattr);
    9369             : 
    9370         348 :                         if (internal && TAILQ_EMPTY(&blob->xattrs_internal)) {
    9371         240 :                                 blob->invalid_flags &= ~SPDK_BLOB_INTERNAL_XATTR;
    9372             :                         }
    9373         348 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9374             : 
    9375         348 :                         return 0;
    9376             :                 }
    9377             :         }
    9378             : 
    9379          52 :         return -ENOENT;
    9380             : }
    9381             : 
    9382             : int
    9383          36 : spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name)
    9384             : {
    9385          36 :         return blob_remove_xattr(blob, name, false);
    9386             : }
    9387             : 
    9388             : static int
    9389        2268 : blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9390             :                      const void **value, size_t *value_len, bool internal)
    9391             : {
    9392             :         struct spdk_xattr       *xattr;
    9393             :         struct spdk_xattr_tailq *xattrs;
    9394             : 
    9395        2268 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9396             : 
    9397        2890 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9398        1372 :                 if (!strcmp(name, xattr->name)) {
    9399         750 :                         *value = xattr->value;
    9400         750 :                         *value_len = xattr->value_len;
    9401         750 :                         return 0;
    9402             :                 }
    9403             :         }
    9404        1518 :         return -ENOENT;
    9405             : }
    9406             : 
    9407             : int
    9408         154 : spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9409             :                           const void **value, size_t *value_len)
    9410             : {
    9411         154 :         blob_verify_md_op(blob);
    9412             : 
    9413         154 :         return blob_get_xattr_value(blob, name, value, value_len, false);
    9414             : }
    9415             : 
    9416             : struct spdk_xattr_names {
    9417             :         uint32_t        count;
    9418             :         const char      *names[0];
    9419             : };
    9420             : 
    9421             : static int
    9422           4 : blob_get_xattr_names(struct spdk_xattr_tailq *xattrs, struct spdk_xattr_names **names)
    9423             : {
    9424             :         struct spdk_xattr       *xattr;
    9425           4 :         int                     count = 0;
    9426             : 
    9427          12 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9428           8 :                 count++;
    9429             :         }
    9430             : 
    9431           4 :         *names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *));
    9432           4 :         if (*names == NULL) {
    9433           0 :                 return -ENOMEM;
    9434             :         }
    9435             : 
    9436          12 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9437           8 :                 (*names)->names[(*names)->count++] = xattr->name;
    9438             :         }
    9439             : 
    9440           4 :         return 0;
    9441             : }
    9442             : 
    9443             : int
    9444           4 : spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names)
    9445             : {
    9446           4 :         blob_verify_md_op(blob);
    9447             : 
    9448           4 :         return blob_get_xattr_names(&blob->xattrs, names);
    9449             : }
    9450             : 
    9451             : uint32_t
    9452           4 : spdk_xattr_names_get_count(struct spdk_xattr_names *names)
    9453             : {
    9454           4 :         assert(names != NULL);
    9455             : 
    9456           4 :         return names->count;
    9457             : }
    9458             : 
    9459             : const char *
    9460           8 : spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index)
    9461             : {
    9462           8 :         if (index >= names->count) {
    9463           0 :                 return NULL;
    9464             :         }
    9465             : 
    9466           8 :         return names->names[index];
    9467             : }
    9468             : 
    9469             : void
    9470           4 : spdk_xattr_names_free(struct spdk_xattr_names *names)
    9471             : {
    9472           4 :         free(names);
    9473           4 : }
    9474             : 
    9475             : struct spdk_bs_type
    9476           2 : spdk_bs_get_bstype(struct spdk_blob_store *bs)
    9477             : {
    9478           2 :         return bs->bstype;
    9479             : }
    9480             : 
    9481             : void
    9482           0 : spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype)
    9483             : {
    9484           0 :         memcpy(&bs->bstype, &bstype, sizeof(bstype));
    9485           0 : }
    9486             : 
    9487             : bool
    9488          48 : spdk_blob_is_read_only(struct spdk_blob *blob)
    9489             : {
    9490          48 :         assert(blob != NULL);
    9491          48 :         return (blob->data_ro || blob->md_ro);
    9492             : }
    9493             : 
    9494             : bool
    9495          52 : spdk_blob_is_snapshot(struct spdk_blob *blob)
    9496             : {
    9497             :         struct spdk_blob_list *snapshot_entry;
    9498             : 
    9499          52 :         assert(blob != NULL);
    9500             : 
    9501          52 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    9502          52 :         if (snapshot_entry == NULL) {
    9503          28 :                 return false;
    9504             :         }
    9505             : 
    9506          24 :         return true;
    9507             : }
    9508             : 
    9509             : bool
    9510          68 : spdk_blob_is_clone(struct spdk_blob *blob)
    9511             : {
    9512          68 :         assert(blob != NULL);
    9513             : 
    9514          68 :         if (blob->parent_id != SPDK_BLOBID_INVALID &&
    9515          44 :             blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    9516          40 :                 assert(spdk_blob_is_thin_provisioned(blob));
    9517          40 :                 return true;
    9518             :         }
    9519             : 
    9520          28 :         return false;
    9521             : }
    9522             : 
    9523             : bool
    9524       36462 : spdk_blob_is_thin_provisioned(struct spdk_blob *blob)
    9525             : {
    9526       36462 :         assert(blob != NULL);
    9527       36462 :         return !!(blob->invalid_flags & SPDK_BLOB_THIN_PROV);
    9528             : }
    9529             : 
    9530             : bool
    9531       40888 : spdk_blob_is_esnap_clone(const struct spdk_blob *blob)
    9532             : {
    9533       40888 :         return blob_is_esnap_clone(blob);
    9534             : }
    9535             : 
    9536             : static void
    9537        3426 : blob_update_clear_method(struct spdk_blob *blob)
    9538             : {
    9539             :         enum blob_clear_method stored_cm;
    9540             : 
    9541        3426 :         assert(blob != NULL);
    9542             : 
    9543             :         /* If BLOB_CLEAR_WITH_DEFAULT was passed in, use the setting stored
    9544             :          * in metadata previously.  If something other than the default was
    9545             :          * specified, ignore stored value and used what was passed in.
    9546             :          */
    9547        3426 :         stored_cm = ((blob->md_ro_flags & SPDK_BLOB_CLEAR_METHOD) >> SPDK_BLOB_CLEAR_METHOD_SHIFT);
    9548             : 
    9549        3426 :         if (blob->clear_method == BLOB_CLEAR_WITH_DEFAULT) {
    9550        3426 :                 blob->clear_method = stored_cm;
    9551           0 :         } else if (blob->clear_method != stored_cm) {
    9552           0 :                 SPDK_WARNLOG("Using passed in clear method 0x%x instead of stored value of 0x%x\n",
    9553             :                              blob->clear_method, stored_cm);
    9554             :         }
    9555        3426 : }
    9556             : 
    9557             : spdk_blob_id
    9558         258 : spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blob_id)
    9559             : {
    9560         258 :         struct spdk_blob_list *snapshot_entry = NULL;
    9561         258 :         struct spdk_blob_list *clone_entry = NULL;
    9562             : 
    9563         494 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
    9564         732 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9565         496 :                         if (clone_entry->id == blob_id) {
    9566         168 :                                 return snapshot_entry->id;
    9567             :                         }
    9568             :                 }
    9569             :         }
    9570             : 
    9571          90 :         return SPDK_BLOBID_INVALID;
    9572             : }
    9573             : 
    9574             : int
    9575         196 : spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
    9576             :                      size_t *count)
    9577             : {
    9578             :         struct spdk_blob_list *snapshot_entry, *clone_entry;
    9579             :         size_t n;
    9580             : 
    9581         196 :         snapshot_entry = bs_get_snapshot_entry(bs, blobid);
    9582         196 :         if (snapshot_entry == NULL) {
    9583          28 :                 *count = 0;
    9584          28 :                 return 0;
    9585             :         }
    9586             : 
    9587         168 :         if (ids == NULL || *count < snapshot_entry->clone_count) {
    9588           8 :                 *count = snapshot_entry->clone_count;
    9589           8 :                 return -ENOMEM;
    9590             :         }
    9591         160 :         *count = snapshot_entry->clone_count;
    9592             : 
    9593         160 :         n = 0;
    9594         340 :         TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9595         180 :                 ids[n++] = clone_entry->id;
    9596             :         }
    9597             : 
    9598         160 :         return 0;
    9599             : }
    9600             : 
    9601             : static void
    9602           4 : bs_load_grow_continue(struct spdk_bs_load_ctx *ctx)
    9603             : {
    9604             :         int rc;
    9605             : 
    9606           4 :         if (ctx->super->size == 0) {
    9607           0 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9608             :         }
    9609             : 
    9610           4 :         if (ctx->super->io_unit_size == 0) {
    9611           0 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    9612             :         }
    9613             : 
    9614             :         /* Parse the super block */
    9615           4 :         ctx->bs->clean = 1;
    9616           4 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    9617           4 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    9618           4 :         ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    9619           4 :         if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
    9620           4 :                 ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
    9621             :         }
    9622           4 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    9623           4 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    9624           4 :         if (rc < 0) {
    9625           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9626           0 :                 return;
    9627             :         }
    9628           4 :         ctx->bs->md_start = ctx->super->md_start;
    9629           4 :         ctx->bs->md_len = ctx->super->md_len;
    9630           4 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    9631           4 :         if (rc < 0) {
    9632           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9633           0 :                 return;
    9634             :         }
    9635             : 
    9636           8 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    9637           4 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    9638           4 :         ctx->bs->super_blob = ctx->super->super_blob;
    9639           4 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    9640             : 
    9641           4 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0) {
    9642           0 :                 SPDK_ERRLOG("Can not grow an unclean blobstore, please load it normally to clean it.\n");
    9643           0 :                 bs_load_ctx_fail(ctx, -EIO);
    9644           0 :                 return;
    9645             :         } else {
    9646           4 :                 bs_load_read_used_pages(ctx);
    9647             :         }
    9648             : }
    9649             : 
    9650             : static void
    9651           4 : bs_load_grow_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9652             : {
    9653           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9654             : 
    9655           4 :         if (bserrno != 0) {
    9656           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9657           0 :                 return;
    9658             :         }
    9659           4 :         bs_load_grow_continue(ctx);
    9660             : }
    9661             : 
    9662             : static void
    9663           4 : bs_load_grow_used_clusters_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9664             : {
    9665           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9666             : 
    9667           4 :         if (bserrno != 0) {
    9668           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9669           0 :                 return;
    9670             :         }
    9671             : 
    9672           4 :         spdk_free(ctx->mask);
    9673             : 
    9674           4 :         bs_sequence_write_dev(ctx->seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    9675           4 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    9676             :                               bs_load_grow_super_write_cpl, ctx);
    9677             : }
    9678             : 
    9679             : static void
    9680           4 : bs_load_grow_used_clusters_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9681             : {
    9682           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9683             :         uint64_t                lba, lba_count;
    9684             :         uint64_t                dev_size;
    9685             :         uint64_t                total_clusters;
    9686             : 
    9687           4 :         if (bserrno != 0) {
    9688           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9689           0 :                 return;
    9690             :         }
    9691             : 
    9692             :         /* The type must be correct */
    9693           4 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    9694             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    9695           4 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    9696             :                                              struct spdk_blob_md_page) * 8));
    9697           4 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9698           4 :         total_clusters = dev_size / ctx->super->cluster_size;
    9699           4 :         ctx->mask->length = total_clusters;
    9700             : 
    9701           4 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9702           4 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9703           4 :         bs_sequence_write_dev(ctx->seq, ctx->mask, lba, lba_count,
    9704             :                               bs_load_grow_used_clusters_write_cpl, ctx);
    9705             : }
    9706             : 
    9707             : static void
    9708           4 : bs_load_try_to_grow(struct spdk_bs_load_ctx *ctx)
    9709             : {
    9710             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9711             :         uint64_t lba, lba_count, mask_size;
    9712             : 
    9713           4 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9714           4 :         total_clusters = dev_size / ctx->super->cluster_size;
    9715           4 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9716           4 :                                 spdk_divide_round_up(total_clusters, 8),
    9717             :                                 SPDK_BS_PAGE_SIZE);
    9718           4 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9719             :         /* No necessary to grow or no space to grow */
    9720           4 :         if (ctx->super->size >= dev_size || used_cluster_mask_len > max_used_cluster_mask) {
    9721           0 :                 SPDK_DEBUGLOG(blob, "No grow\n");
    9722           0 :                 bs_load_grow_continue(ctx);
    9723           0 :                 return;
    9724             :         }
    9725             : 
    9726           4 :         SPDK_DEBUGLOG(blob, "Resize blobstore\n");
    9727             : 
    9728           4 :         ctx->super->size = dev_size;
    9729           4 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9730           4 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    9731             : 
    9732           4 :         mask_size = used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    9733           4 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    9734             :                                  SPDK_MALLOC_DMA);
    9735           4 :         if (!ctx->mask) {
    9736           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9737           0 :                 return;
    9738             :         }
    9739           4 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9740           4 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9741           4 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    9742             :                              bs_load_grow_used_clusters_read_cpl, ctx);
    9743             : }
    9744             : 
    9745             : static void
    9746           4 : bs_grow_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9747             : {
    9748           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9749             :         int rc;
    9750             : 
    9751           4 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9752           4 :         if (rc != 0) {
    9753           0 :                 bs_load_ctx_fail(ctx, rc);
    9754           0 :                 return;
    9755             :         }
    9756             : 
    9757           4 :         bs_load_try_to_grow(ctx);
    9758             : }
    9759             : 
    9760             : struct spdk_bs_grow_ctx {
    9761             :         struct spdk_blob_store          *bs;
    9762             :         struct spdk_bs_super_block      *super;
    9763             : 
    9764             :         struct spdk_bit_pool            *new_used_clusters;
    9765             :         struct spdk_bs_md_mask          *new_used_clusters_mask;
    9766             : 
    9767             :         spdk_bs_sequence_t              *seq;
    9768             : };
    9769             : 
    9770             : static void
    9771          32 : bs_grow_live_done(struct spdk_bs_grow_ctx *ctx, int bserrno)
    9772             : {
    9773          32 :         if (bserrno != 0) {
    9774           8 :                 spdk_bit_pool_free(&ctx->new_used_clusters);
    9775             :         }
    9776             : 
    9777          32 :         bs_sequence_finish(ctx->seq, bserrno);
    9778          32 :         free(ctx->new_used_clusters_mask);
    9779          32 :         spdk_free(ctx->super);
    9780          32 :         free(ctx);
    9781          32 : }
    9782             : 
    9783             : static void
    9784           8 : bs_grow_live_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9785             : {
    9786           8 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9787           8 :         struct spdk_blob_store *bs = ctx->bs;
    9788             :         uint64_t total_clusters;
    9789             : 
    9790           8 :         if (bserrno != 0) {
    9791           0 :                 bs_grow_live_done(ctx, bserrno);
    9792           0 :                 return;
    9793             :         }
    9794             : 
    9795             :         /*
    9796             :          * Blobstore is not clean until unload, for now only the super block is up to date.
    9797             :          * This is similar to state right after blobstore init, when bs_write_used_md() didn't
    9798             :          * yet execute.
    9799             :          * When cleanly unloaded, the used md pages will be written out.
    9800             :          * In case of unclean shutdown, loading blobstore will go through recovery path correctly
    9801             :          * filling out the used_clusters with new size and writing it out.
    9802             :          */
    9803           8 :         bs->clean = 0;
    9804             : 
    9805             :         /* Reverting the super->size past this point is complex, avoid any error paths
    9806             :          * that require to do so. */
    9807           8 :         spdk_spin_lock(&bs->used_lock);
    9808             : 
    9809           8 :         total_clusters = ctx->super->size / ctx->super->cluster_size;
    9810             : 
    9811           8 :         assert(total_clusters >= spdk_bit_pool_capacity(bs->used_clusters));
    9812           8 :         spdk_bit_pool_store_mask(bs->used_clusters, ctx->new_used_clusters_mask);
    9813             : 
    9814           8 :         assert(total_clusters == spdk_bit_pool_capacity(ctx->new_used_clusters));
    9815           8 :         spdk_bit_pool_load_mask(ctx->new_used_clusters, ctx->new_used_clusters_mask);
    9816             : 
    9817           8 :         spdk_bit_pool_free(&bs->used_clusters);
    9818           8 :         bs->used_clusters = ctx->new_used_clusters;
    9819             : 
    9820           8 :         bs->total_clusters = total_clusters;
    9821          16 :         bs->total_data_clusters = bs->total_clusters - spdk_divide_round_up(
    9822           8 :                                           bs->md_start + bs->md_len, bs->pages_per_cluster);
    9823             : 
    9824           8 :         bs->num_free_clusters = spdk_bit_pool_count_free(bs->used_clusters);
    9825           8 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    9826           8 :         spdk_spin_unlock(&bs->used_lock);
    9827             : 
    9828           8 :         bs_grow_live_done(ctx, 0);
    9829             : }
    9830             : 
    9831             : static void
    9832          32 : bs_grow_live_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9833             : {
    9834          32 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9835             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9836             :         int rc;
    9837             : 
    9838          32 :         if (bserrno != 0) {
    9839           0 :                 bs_grow_live_done(ctx, bserrno);
    9840           0 :                 return;
    9841             :         }
    9842             : 
    9843          32 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9844          32 :         if (rc != 0) {
    9845           4 :                 bs_grow_live_done(ctx, rc);
    9846           4 :                 return;
    9847             :         }
    9848             : 
    9849          28 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9850          28 :         total_clusters = dev_size / ctx->super->cluster_size;
    9851          28 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9852          28 :                                 spdk_divide_round_up(total_clusters, 8),
    9853             :                                 SPDK_BS_PAGE_SIZE);
    9854          28 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9855             :         /* Only checking dev_size. Since it can change, but total_clusters remain the same. */
    9856          28 :         if (dev_size == ctx->super->size) {
    9857          16 :                 SPDK_DEBUGLOG(blob, "No need to grow blobstore\n");
    9858          16 :                 bs_grow_live_done(ctx, 0);
    9859          16 :                 return;
    9860             :         }
    9861             :         /*
    9862             :          * Blobstore cannot be shrunk, so check before if:
    9863             :          * - new size of the device is smaller than size in super_block
    9864             :          * - new total number of clusters is smaller than used_clusters bit_pool
    9865             :          * - there is enough space in metadata for used_cluster_mask to be written out
    9866             :          */
    9867          12 :         if (dev_size < ctx->super->size ||
    9868          12 :             total_clusters < spdk_bit_pool_capacity(ctx->bs->used_clusters) ||
    9869             :             used_cluster_mask_len > max_used_cluster_mask) {
    9870           4 :                 SPDK_DEBUGLOG(blob, "No space to grow blobstore\n");
    9871           4 :                 bs_grow_live_done(ctx, -ENOSPC);
    9872           4 :                 return;
    9873             :         }
    9874             : 
    9875           8 :         SPDK_DEBUGLOG(blob, "Resizing blobstore\n");
    9876             : 
    9877           8 :         ctx->new_used_clusters_mask = calloc(1, total_clusters);
    9878           8 :         if (!ctx->new_used_clusters_mask) {
    9879           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9880           0 :                 return;
    9881             :         }
    9882           8 :         ctx->new_used_clusters = spdk_bit_pool_create(total_clusters);
    9883           8 :         if (!ctx->new_used_clusters) {
    9884           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9885           0 :                 return;
    9886             :         }
    9887             : 
    9888           8 :         ctx->super->clean = 0;
    9889           8 :         ctx->super->size = dev_size;
    9890           8 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9891           8 :         bs_write_super(seq, ctx->bs, ctx->super, bs_grow_live_super_write_cpl, ctx);
    9892             : }
    9893             : 
    9894             : void
    9895          32 : spdk_bs_grow_live(struct spdk_blob_store *bs,
    9896             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    9897             : {
    9898          32 :         struct spdk_bs_cpl      cpl;
    9899             :         struct spdk_bs_grow_ctx *ctx;
    9900             : 
    9901          32 :         assert(spdk_get_thread() == bs->md_thread);
    9902             : 
    9903          32 :         SPDK_DEBUGLOG(blob, "Growing blobstore on dev %p\n", bs->dev);
    9904             : 
    9905          32 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    9906          32 :         cpl.u.bs_basic.cb_fn = cb_fn;
    9907          32 :         cpl.u.bs_basic.cb_arg = cb_arg;
    9908             : 
    9909          32 :         ctx = calloc(1, sizeof(struct spdk_bs_grow_ctx));
    9910          32 :         if (!ctx) {
    9911           0 :                 cb_fn(cb_arg, -ENOMEM);
    9912           0 :                 return;
    9913             :         }
    9914          32 :         ctx->bs = bs;
    9915             : 
    9916          32 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    9917             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    9918          32 :         if (!ctx->super) {
    9919           0 :                 free(ctx);
    9920           0 :                 cb_fn(cb_arg, -ENOMEM);
    9921           0 :                 return;
    9922             :         }
    9923             : 
    9924          32 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    9925          32 :         if (!ctx->seq) {
    9926           0 :                 spdk_free(ctx->super);
    9927           0 :                 free(ctx);
    9928           0 :                 cb_fn(cb_arg, -ENOMEM);
    9929           0 :                 return;
    9930             :         }
    9931             : 
    9932             :         /* Read the super block */
    9933          32 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    9934          32 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    9935             :                              bs_grow_live_load_super_cpl, ctx);
    9936             : }
    9937             : 
    9938             : void
    9939           4 : spdk_bs_grow(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    9940             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    9941             : {
    9942           4 :         struct spdk_blob_store  *bs;
    9943           4 :         struct spdk_bs_cpl      cpl;
    9944           4 :         struct spdk_bs_load_ctx *ctx;
    9945           4 :         struct spdk_bs_opts     opts = {};
    9946             :         int err;
    9947             : 
    9948           4 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    9949             : 
    9950           4 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    9951           0 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    9952           0 :                 dev->destroy(dev);
    9953           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
    9954           0 :                 return;
    9955             :         }
    9956             : 
    9957           4 :         spdk_bs_opts_init(&opts, sizeof(opts));
    9958           4 :         if (o) {
    9959           4 :                 if (bs_opts_copy(o, &opts)) {
    9960           0 :                         return;
    9961             :                 }
    9962             :         }
    9963             : 
    9964           4 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
    9965           0 :                 dev->destroy(dev);
    9966           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
    9967           0 :                 return;
    9968             :         }
    9969             : 
    9970           4 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    9971           4 :         if (err) {
    9972           0 :                 dev->destroy(dev);
    9973           0 :                 cb_fn(cb_arg, NULL, err);
    9974           0 :                 return;
    9975             :         }
    9976             : 
    9977           4 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    9978           4 :         cpl.u.bs_handle.cb_fn = cb_fn;
    9979           4 :         cpl.u.bs_handle.cb_arg = cb_arg;
    9980           4 :         cpl.u.bs_handle.bs = bs;
    9981             : 
    9982           4 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    9983           4 :         if (!ctx->seq) {
    9984           0 :                 spdk_free(ctx->super);
    9985           0 :                 free(ctx);
    9986           0 :                 bs_free(bs);
    9987           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9988           0 :                 return;
    9989             :         }
    9990             : 
    9991             :         /* Read the super block */
    9992           4 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    9993           4 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    9994             :                              bs_grow_load_super_cpl, ctx);
    9995             : }
    9996             : 
    9997             : int
    9998          24 : spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
    9999             : {
   10000          24 :         if (!blob_is_esnap_clone(blob)) {
   10001          12 :                 return -EINVAL;
   10002             :         }
   10003             : 
   10004          12 :         return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
   10005             : }
   10006             : 
   10007             : struct spdk_io_channel *
   10008        8840 : blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
   10009             : {
   10010        8840 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(ch);
   10011        8840 :         struct spdk_bs_dev              *bs_dev = blob->back_bs_dev;
   10012        8840 :         struct blob_esnap_channel       find = {};
   10013             :         struct blob_esnap_channel       *esnap_channel, *existing;
   10014             : 
   10015        8840 :         find.blob_id = blob->id;
   10016        8840 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10017        8840 :         if (spdk_likely(esnap_channel != NULL)) {
   10018        8796 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
   10019             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10020        8796 :                 return esnap_channel->channel;
   10021             :         }
   10022             : 
   10023          44 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
   10024             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
   10025             : 
   10026          44 :         esnap_channel = calloc(1, sizeof(*esnap_channel));
   10027          44 :         if (esnap_channel == NULL) {
   10028           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
   10029             :                                find.blob_id);
   10030           0 :                 return NULL;
   10031             :         }
   10032          44 :         esnap_channel->channel = bs_dev->create_channel(bs_dev);
   10033          44 :         if (esnap_channel->channel == NULL) {
   10034           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
   10035           0 :                 free(esnap_channel);
   10036           0 :                 return NULL;
   10037             :         }
   10038          44 :         esnap_channel->blob_id = find.blob_id;
   10039          44 :         existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10040          44 :         if (spdk_unlikely(existing != NULL)) {
   10041             :                 /*
   10042             :                  * This should be unreachable: all modifications to this tree happen on this thread.
   10043             :                  */
   10044           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
   10045           0 :                 assert(false);
   10046             : 
   10047             :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10048             :                 free(esnap_channel);
   10049             : 
   10050             :                 return existing->channel;
   10051             :         }
   10052             : 
   10053          44 :         return esnap_channel->channel;
   10054             : }
   10055             : 
   10056             : static int
   10057        8816 : blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
   10058             : {
   10059        8816 :         return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
   10060             : }
   10061             : 
   10062             : struct blob_esnap_destroy_ctx {
   10063             :         spdk_blob_op_with_handle_complete       cb_fn;
   10064             :         void                                    *cb_arg;
   10065             :         struct spdk_blob                        *blob;
   10066             :         struct spdk_bs_dev                      *back_bs_dev;
   10067             :         bool                                    abort_io;
   10068             : };
   10069             : 
   10070             : static void
   10071         136 : blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
   10072             : {
   10073         136 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10074         136 :         struct spdk_blob                *blob = ctx->blob;
   10075         136 :         struct spdk_blob_store          *bs = blob->bs;
   10076             : 
   10077         136 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
   10078             :                       blob->id);
   10079             : 
   10080         136 :         if (ctx->cb_fn != NULL) {
   10081         124 :                 ctx->cb_fn(ctx->cb_arg, blob, status);
   10082             :         }
   10083         136 :         free(ctx);
   10084             : 
   10085         136 :         bs->esnap_channels_unloading--;
   10086         136 :         if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) {
   10087           4 :                 spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg);
   10088             :         }
   10089         136 : }
   10090             : 
   10091             : static void
   10092         144 : blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
   10093             : {
   10094         144 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10095         144 :         struct spdk_blob                *blob = ctx->blob;
   10096         144 :         struct spdk_bs_dev              *bs_dev = ctx->back_bs_dev;
   10097         144 :         struct spdk_io_channel          *channel = spdk_io_channel_iter_get_channel(i);
   10098         144 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(channel);
   10099             :         struct blob_esnap_channel       *esnap_channel;
   10100         144 :         struct blob_esnap_channel       find = {};
   10101             : 
   10102         144 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));
   10103             : 
   10104         144 :         find.blob_id = blob->id;
   10105         144 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10106         144 :         if (esnap_channel != NULL) {
   10107          12 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
   10108             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10109          12 :                 RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10110             : 
   10111          12 :                 if (ctx->abort_io) {
   10112             :                         spdk_bs_user_op_t *op, *tmp;
   10113             : 
   10114           8 :                         TAILQ_FOREACH_SAFE(op, &bs_channel->queued_io, link, tmp) {
   10115           0 :                                 if (op->back_channel == esnap_channel->channel) {
   10116           0 :                                         TAILQ_REMOVE(&bs_channel->queued_io, op, link);
   10117           0 :                                         bs_user_op_abort(op, -EIO);
   10118             :                                 }
   10119             :                         }
   10120             :                 }
   10121             : 
   10122          12 :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10123          12 :                 free(esnap_channel);
   10124             :         }
   10125             : 
   10126         144 :         spdk_for_each_channel_continue(i, 0);
   10127         144 : }
   10128             : 
   10129             : /*
   10130             :  * Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
   10131             :  * used when closing an esnap clone blob and after decoupling from the parent.
   10132             :  */
   10133             : static void
   10134         480 : blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
   10135             :                                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
   10136             : {
   10137             :         struct blob_esnap_destroy_ctx   *ctx;
   10138             : 
   10139         480 :         if (!blob_is_esnap_clone(blob) || blob->back_bs_dev == NULL) {
   10140         344 :                 if (cb_fn != NULL) {
   10141         344 :                         cb_fn(cb_arg, blob, 0);
   10142             :                 }
   10143         344 :                 return;
   10144             :         }
   10145             : 
   10146         136 :         ctx = calloc(1, sizeof(*ctx));
   10147         136 :         if (ctx == NULL) {
   10148           0 :                 if (cb_fn != NULL) {
   10149           0 :                         cb_fn(cb_arg, blob, -ENOMEM);
   10150             :                 }
   10151           0 :                 return;
   10152             :         }
   10153         136 :         ctx->cb_fn = cb_fn;
   10154         136 :         ctx->cb_arg = cb_arg;
   10155         136 :         ctx->blob = blob;
   10156         136 :         ctx->back_bs_dev = blob->back_bs_dev;
   10157         136 :         ctx->abort_io = abort_io;
   10158             : 
   10159         136 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
   10160             :                       blob->id);
   10161             : 
   10162         136 :         blob->bs->esnap_channels_unloading++;
   10163         136 :         spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
   10164             :                               blob_esnap_destroy_channels_done);
   10165             : }
   10166             : 
   10167             : /*
   10168             :  * Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
   10169             :  * bs_channel is destroyed.
   10170             :  */
   10171             : static void
   10172        1025 : blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
   10173             : {
   10174             :         struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;
   10175             : 
   10176        1025 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));
   10177             : 
   10178        1025 :         SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
   10179             :                       spdk_thread_get_name(spdk_get_thread()));
   10180        1057 :         RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
   10181             :                         esnap_channel_tmp) {
   10182          32 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
   10183             :                               ": destroying one channel in thread %s\n",
   10184             :                               esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
   10185          32 :                 RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
   10186          32 :                 spdk_put_io_channel(esnap_channel->channel);
   10187          32 :                 free(esnap_channel);
   10188             :         }
   10189        1025 :         SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
   10190             :                       spdk_thread_get_name(spdk_get_thread()));
   10191        1025 : }
   10192             : 
   10193             : static void
   10194          28 : blob_set_back_bs_dev_done(void *_ctx, int bserrno)
   10195             : {
   10196          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10197             : 
   10198          28 :         if (bserrno != 0) {
   10199             :                 /* Even though the unfreeze failed, the update may have succeed. */
   10200           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": unfreeze failed with error %d\n", ctx->blob->id,
   10201             :                             bserrno);
   10202             :         }
   10203          28 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
   10204          28 :         free(ctx);
   10205          28 : }
   10206             : 
   10207             : static void
   10208          28 : blob_frozen_set_back_bs_dev(void *_ctx, struct spdk_blob *blob, int bserrno)
   10209             : {
   10210          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10211             :         int rc;
   10212             : 
   10213          28 :         if (bserrno != 0) {
   10214           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to release old back_bs_dev with error %d\n",
   10215             :                             blob->id, bserrno);
   10216           0 :                 ctx->bserrno = bserrno;
   10217           0 :                 blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10218           0 :                 return;
   10219             :         }
   10220             : 
   10221          28 :         if (blob->back_bs_dev != NULL) {
   10222          28 :                 blob->back_bs_dev->destroy(blob->back_bs_dev);
   10223          28 :                 blob->back_bs_dev = NULL;
   10224             :         }
   10225             : 
   10226          28 :         if (ctx->parent_refs_cb_fn) {
   10227          20 :                 rc = ctx->parent_refs_cb_fn(blob, ctx->parent_refs_cb_arg);
   10228          20 :                 if (rc != 0) {
   10229           0 :                         ctx->bserrno = rc;
   10230           0 :                         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10231           0 :                         return;
   10232             :                 }
   10233             :         }
   10234             : 
   10235          28 :         SPDK_NOTICELOG("blob 0x%" PRIx64 ": hotplugged back_bs_dev\n", blob->id);
   10236          28 :         blob->back_bs_dev = ctx->back_bs_dev;
   10237          28 :         ctx->bserrno = 0;
   10238             : 
   10239          28 :         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10240             : }
   10241             : 
   10242             : static void
   10243          28 : blob_set_back_bs_dev_frozen(void *_ctx, int bserrno)
   10244             : {
   10245          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10246          28 :         struct spdk_blob        *blob = ctx->blob;
   10247             : 
   10248          28 :         if (bserrno != 0) {
   10249           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to freeze with error %d\n", blob->id,
   10250             :                             bserrno);
   10251           0 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
   10252           0 :                 free(ctx);
   10253           0 :                 return;
   10254             :         }
   10255             : 
   10256             :         /*
   10257             :          * This does not prevent future reads from the esnap device because any future IO will
   10258             :          * lazily create a new esnap IO channel.
   10259             :          */
   10260          28 :         blob_esnap_destroy_bs_dev_channels(blob, true, blob_frozen_set_back_bs_dev, ctx);
   10261             : }
   10262             : 
   10263             : void
   10264           8 : spdk_blob_set_esnap_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
   10265             :                            spdk_blob_op_complete cb_fn, void *cb_arg)
   10266             : {
   10267           8 :         if (!blob_is_esnap_clone(blob)) {
   10268           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10269           0 :                 cb_fn(cb_arg, -EINVAL);
   10270           0 :                 return;
   10271             :         }
   10272             : 
   10273           8 :         blob_set_back_bs_dev(blob, back_bs_dev, NULL, NULL, cb_fn, cb_arg);
   10274             : }
   10275             : 
   10276             : struct spdk_bs_dev *
   10277           4 : spdk_blob_get_esnap_bs_dev(const struct spdk_blob *blob)
   10278             : {
   10279           4 :         if (!blob_is_esnap_clone(blob)) {
   10280           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10281           0 :                 return NULL;
   10282             :         }
   10283             : 
   10284           4 :         return blob->back_bs_dev;
   10285             : }
   10286             : 
   10287             : bool
   10288          28 : spdk_blob_is_degraded(const struct spdk_blob *blob)
   10289             : {
   10290          28 :         if (blob->bs->dev->is_degraded != NULL && blob->bs->dev->is_degraded(blob->bs->dev)) {
   10291           4 :                 return true;
   10292             :         }
   10293          24 :         if (blob->back_bs_dev == NULL || blob->back_bs_dev->is_degraded == NULL) {
   10294          12 :                 return false;
   10295             :         }
   10296             : 
   10297          12 :         return blob->back_bs_dev->is_degraded(blob->back_bs_dev);
   10298             : }
   10299             : 
   10300           3 : SPDK_LOG_REGISTER_COMPONENT(blob)
   10301           3 : SPDK_LOG_REGISTER_COMPONENT(blob_esnap)
   10302             : 
   10303             : 
   10304           3 : SPDK_TRACE_REGISTER_FN(blob_trace, "blob", TRACE_GROUP_BLOB)
   10305             : {
   10306           0 :         struct spdk_trace_tpoint_opts opts[] = {
   10307             :                 {
   10308             :                         "BLOB_PROCESS_START", TRACE_BLOB_PROCESS_START,
   10309             :                         OWNER_TYPE_NONE, OBJECT_BLOB_CB_ARG, 1,
   10310             :                         {
   10311             :                                 { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }
   10312             :                         }
   10313             :                 },
   10314             :                 {
   10315             :                         "BLOB_PROCESS_COMPLETE", TRACE_BLOB_PROCESS_COMPLETE,
   10316             :                         OWNER_TYPE_NONE, OBJECT_BLOB_CB_ARG, 0,
   10317             :                         {
   10318             :                                 { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }
   10319             :                         }
   10320             :                 },
   10321             :         };
   10322             : 
   10323             : 
   10324           0 :         spdk_trace_register_object(OBJECT_BLOB_CB_ARG, 'a');
   10325           0 :         spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
   10326           0 :         spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_START, OBJECT_BLOB_CB_ARG, 1);
   10327           0 :         spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_DONE, OBJECT_BLOB_CB_ARG, 0);
   10328           0 : }

Generated by: LCOV version 1.15